From: Thomas Pornin Date: Tue, 31 Jul 2018 21:00:26 +0000 (+0200) Subject: Added RSA key generation code (i15, i31, i62). X-Git-Tag: v0.6~6 X-Git-Url: https://bearssl.org/gitweb//home/git/?a=commitdiff_plain;h=491a45337de8dc0a4c100abf33f5c0e187a08afd;p=BearSSL Added RSA key generation code (i15, i31, i62). --- diff --git a/inc/bearssl_rsa.h b/inc/bearssl_rsa.h index ac0415f..7c79d33 100644 --- a/inc/bearssl_rsa.h +++ b/inc/bearssl_rsa.h @@ -84,7 +84,7 @@ extern "C" { * random masking, but "true" constant-time code. * * - They support only private keys with two prime factors. RSA private - * key with three or more prime factors are nominally supported, but + * keys with three or more prime factors are nominally supported, but * rarely used; they may offer faster operations, at the expense of * more code and potentially a reduction in security if there are * "too many" prime factors. @@ -92,7 +92,7 @@ extern "C" { * - The public exponent may have arbitrary length. Of course, it is * a good idea to keep public exponents small, so that public key * operations are fast; but, contrary to some widely deployed - * implementations, BearSSL has no problem with public exponent + * implementations, BearSSL has no problem with public exponents * longer than 32 bits. * * - The two prime factors of the modulus need not have the same length @@ -170,7 +170,7 @@ typedef struct { /** * \brief RSA private key. * - * The structure references the primvate factors, reduced private + * The structure references the private factors, reduced private * exponents, and CRT coefficient. It also contains the bit length of * the modulus. The big integers use unsigned big-endian representation; * extra leading bytes of value 0 are allowed. However, the modulus bit @@ -1032,6 +1032,158 @@ uint32_t br_rsa_i62_oaep_decrypt( const br_hash_class *dig, const void *label, size_t label_len, const br_rsa_private_key *sk, void *data, size_t *len); +/** + * \brief Get buffer size to hold RSA private key elements. + * + * This macro returns the length (in bytes) of the buffer needed to + * receive the elements of a RSA private key, as generated by one of + * the `br_rsa_*_keygen()` functions. If the provided size is a constant + * expression, then the whole macro evaluates to a constant expression. + * + * \param size target key size (modulus size, in bits) + * \return the length of the private key buffer, in bytes. + */ +#define BR_RSA_KBUF_PRIV_SIZE(size) (5 * (((size) + 15) >> 4)) + +/** + * \brief Get buffer size to hold RSA public key elements. + * + * This macro returns the length (in bytes) of the buffer needed to + * receive the elements of a RSA public key, as generated by one of + * the `br_rsa_*_keygen()` functions. If the provided size is a constant + * expression, then the whole macro evaluates to a constant expression. + * + * \param size target key size (modulus size, in bits) + * \return the length of the public key buffer, in bytes. + */ +#define BR_RSA_KBUF_PUB_SIZE(size) (4 + (((size) + 7) >> 3)) + +/** + * \brief Type for RSA key pair generator implementation. + * + * This function generates a new RSA key pair whose modulus has bit + * length `size` bits. The private key elements are written in the + * `kbuf_priv` buffer, and pointer values and length fields to these + * elements are populated in the provided private key structure `sk`. + * Similarly, the public key elements are written in `kbuf_pub`, with + * pointers and lengths set in `pk`. + * + * If `sk` is `NULL`, then `kbuf_pub` may be `NULL`, and only the + * private key is set. + * + * If `pubexp` is not zero, then its value will be used as public + * exponent. Valid RSA public exponent values are odd integers + * greater than 1. If `pubexp` is zero, then the public exponent will + * have value 3. + * + * The provided PRNG (`rng_ctx`) must have already been initialized + * and seeded. + * + * Returned value is 1 on success, 0 on error. An error is reported + * if the requested range is outside of the supported key sizes, or + * if an invalid non-zero public exponent value is provided. Supported + * range starts at 512 bits, and up to an implementation-defined + * maximum (by default 4096 bits). Note that key sizes up to 768 bits + * have been broken in practice, and sizes lower than 2048 bits are + * usually considered to be weak and should not be used. + * + * \param rng_ctx source PRNG context (already initialized) + * \param sk RSA private key structure (destination) + * \param kbuf_priv buffer for private key elements + * \param pk RSA public key structure (destination), or `NULL` + * \param kbuf_pub buffer for public key elements, or `NULL` + * \param size target RSA modulus size (in bits) + * \param pubexp public exponent to use, or zero + * \return 1 on success, 0 on error (invalid parameters) + */ +typedef uint32_t (*br_rsa_keygen)( + const br_prng_class **rng_ctx, + br_rsa_private_key *sk, unsigned char *kbuf_priv, + br_rsa_public_key *pk, unsigned char *kbuf_pub, + unsigned size, uint32_t pubexp); + +/** + * \brief RSA key pair generation with the "i15" engine. + * + * \see br_rsa_keygen + * + * \param rng_ctx source PRNG context (already initialized) + * \param sk RSA private key structure (destination) + * \param kbuf_priv buffer for private key elements + * \param pk RSA public key structure (destination), or `NULL` + * \param kbuf_pub buffer for public key elements, or `NULL` + * \param size target RSA modulus size (in bits) + * \param pubexp public exponent to use, or zero + * \return 1 on success, 0 on error (invalid parameters) + */ +uint32_t br_rsa_i15_keygen( + const br_prng_class **rng_ctx, + br_rsa_private_key *sk, unsigned char *kbuf_priv, + br_rsa_public_key *pk, unsigned char *kbuf_pub, + unsigned size, uint32_t pubexp); + +/** + * \brief RSA key pair generation with the "i31" engine. + * + * \see br_rsa_keygen + * + * \param rng_ctx source PRNG context (already initialized) + * \param sk RSA private key structure (destination) + * \param kbuf_priv buffer for private key elements + * \param pk RSA public key structure (destination), or `NULL` + * \param kbuf_pub buffer for public key elements, or `NULL` + * \param size target RSA modulus size (in bits) + * \param pubexp public exponent to use, or zero + * \return 1 on success, 0 on error (invalid parameters) + */ +uint32_t br_rsa_i31_keygen( + const br_prng_class **rng_ctx, + br_rsa_private_key *sk, unsigned char *kbuf_priv, + br_rsa_public_key *pk, unsigned char *kbuf_pub, + unsigned size, uint32_t pubexp); + +/** + * \brief RSA key pair generation with the "i62" engine. + * + * This function is defined only on architecture that offer a 64x64->128 + * opcode. Use `br_rsa_i62_keygen_get()` to dynamically obtain a pointer + * to that function. + * + * \see br_rsa_keygen + * + * \param rng_ctx source PRNG context (already initialized) + * \param sk RSA private key structure (destination) + * \param kbuf_priv buffer for private key elements + * \param pk RSA public key structure (destination), or `NULL` + * \param kbuf_pub buffer for public key elements, or `NULL` + * \param size target RSA modulus size (in bits) + * \param pubexp public exponent to use, or zero + * \return 1 on success, 0 on error (invalid parameters) + */ +uint32_t br_rsa_i62_keygen( + const br_prng_class **rng_ctx, + br_rsa_private_key *sk, unsigned char *kbuf_priv, + br_rsa_public_key *pk, unsigned char *kbuf_pub, + unsigned size, uint32_t pubexp); + +/** + * \brief Get the RSA "i62" implementation (key pair generation), + * if available. + * + * \return the implementation, or 0. + */ +br_rsa_keygen br_rsa_i62_keygen_get(void); + +/** + * \brief Get "default" RSA implementation (key pair generation). + * + * This returns the preferred implementation of RSA (key pair generation) + * on the current system. + * + * \return the default implementation. + */ +br_rsa_keygen br_rsa_keygen_get_default(void); + #ifdef __cplusplus } #endif diff --git a/mk/Rules.mk b/mk/Rules.mk index 7a2fac2..1ffc020 100644 --- a/mk/Rules.mk +++ b/mk/Rules.mk @@ -72,6 +72,7 @@ OBJ = \ $(OBJDIR)$Pi15_encode$O \ $(OBJDIR)$Pi15_fmont$O \ $(OBJDIR)$Pi15_iszero$O \ + $(OBJDIR)$Pi15_moddiv$O \ $(OBJDIR)$Pi15_modpow$O \ $(OBJDIR)$Pi15_modpow2$O \ $(OBJDIR)$Pi15_montmul$O \ @@ -90,6 +91,7 @@ OBJ = \ $(OBJDIR)$Pi31_encode$O \ $(OBJDIR)$Pi31_fmont$O \ $(OBJDIR)$Pi31_iszero$O \ + $(OBJDIR)$Pi31_moddiv$O \ $(OBJDIR)$Pi31_modpow$O \ $(OBJDIR)$Pi31_modpow2$O \ $(OBJDIR)$Pi31_montmul$O \ @@ -122,18 +124,22 @@ OBJ = \ $(OBJDIR)$Phmac_ct$O \ $(OBJDIR)$Phmac_drbg$O \ $(OBJDIR)$Psysrng$O \ + $(OBJDIR)$Prsa_default_keygen$O \ $(OBJDIR)$Prsa_default_oaep_decrypt$O \ $(OBJDIR)$Prsa_default_oaep_encrypt$O \ $(OBJDIR)$Prsa_default_pkcs1_sign$O \ $(OBJDIR)$Prsa_default_pkcs1_vrfy$O \ $(OBJDIR)$Prsa_default_priv$O \ $(OBJDIR)$Prsa_default_pub$O \ + $(OBJDIR)$Prsa_i15_keygen$O \ $(OBJDIR)$Prsa_i15_oaep_decrypt$O \ $(OBJDIR)$Prsa_i15_oaep_encrypt$O \ $(OBJDIR)$Prsa_i15_pkcs1_sign$O \ $(OBJDIR)$Prsa_i15_pkcs1_vrfy$O \ $(OBJDIR)$Prsa_i15_priv$O \ $(OBJDIR)$Prsa_i15_pub$O \ + $(OBJDIR)$Prsa_i31_keygen$O \ + $(OBJDIR)$Prsa_i31_keygen_inner$O \ $(OBJDIR)$Prsa_i31_oaep_decrypt$O \ $(OBJDIR)$Prsa_i31_oaep_encrypt$O \ $(OBJDIR)$Prsa_i31_pkcs1_sign$O \ @@ -146,6 +152,7 @@ OBJ = \ $(OBJDIR)$Prsa_i32_pkcs1_vrfy$O \ $(OBJDIR)$Prsa_i32_priv$O \ $(OBJDIR)$Prsa_i32_pub$O \ + $(OBJDIR)$Prsa_i62_keygen$O \ $(OBJDIR)$Prsa_i62_oaep_decrypt$O \ $(OBJDIR)$Prsa_i62_oaep_encrypt$O \ $(OBJDIR)$Prsa_i62_pkcs1_sign$O \ @@ -544,6 +551,9 @@ $(OBJDIR)$Pi15_fmont$O: src$Pint$Pi15_fmont.c $(HEADERSPRIV) $(OBJDIR)$Pi15_iszero$O: src$Pint$Pi15_iszero.c $(HEADERSPRIV) $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Pi15_iszero$O src$Pint$Pi15_iszero.c +$(OBJDIR)$Pi15_moddiv$O: src$Pint$Pi15_moddiv.c $(HEADERSPRIV) + $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Pi15_moddiv$O src$Pint$Pi15_moddiv.c + $(OBJDIR)$Pi15_modpow$O: src$Pint$Pi15_modpow.c $(HEADERSPRIV) $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Pi15_modpow$O src$Pint$Pi15_modpow.c @@ -598,6 +608,9 @@ $(OBJDIR)$Pi31_fmont$O: src$Pint$Pi31_fmont.c $(HEADERSPRIV) $(OBJDIR)$Pi31_iszero$O: src$Pint$Pi31_iszero.c $(HEADERSPRIV) $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Pi31_iszero$O src$Pint$Pi31_iszero.c +$(OBJDIR)$Pi31_moddiv$O: src$Pint$Pi31_moddiv.c $(HEADERSPRIV) + $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Pi31_moddiv$O src$Pint$Pi31_moddiv.c + $(OBJDIR)$Pi31_modpow$O: src$Pint$Pi31_modpow.c $(HEADERSPRIV) $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Pi31_modpow$O src$Pint$Pi31_modpow.c @@ -694,6 +707,9 @@ $(OBJDIR)$Phmac_drbg$O: src$Prand$Phmac_drbg.c $(HEADERSPRIV) $(OBJDIR)$Psysrng$O: src$Prand$Psysrng.c $(HEADERSPRIV) $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Psysrng$O src$Prand$Psysrng.c +$(OBJDIR)$Prsa_default_keygen$O: src$Prsa$Prsa_default_keygen.c $(HEADERSPRIV) + $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Prsa_default_keygen$O src$Prsa$Prsa_default_keygen.c + $(OBJDIR)$Prsa_default_oaep_decrypt$O: src$Prsa$Prsa_default_oaep_decrypt.c $(HEADERSPRIV) $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Prsa_default_oaep_decrypt$O src$Prsa$Prsa_default_oaep_decrypt.c @@ -712,6 +728,9 @@ $(OBJDIR)$Prsa_default_priv$O: src$Prsa$Prsa_default_priv.c $(HEADERSPRIV) $(OBJDIR)$Prsa_default_pub$O: src$Prsa$Prsa_default_pub.c $(HEADERSPRIV) $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Prsa_default_pub$O src$Prsa$Prsa_default_pub.c +$(OBJDIR)$Prsa_i15_keygen$O: src$Prsa$Prsa_i15_keygen.c $(HEADERSPRIV) + $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Prsa_i15_keygen$O src$Prsa$Prsa_i15_keygen.c + $(OBJDIR)$Prsa_i15_oaep_decrypt$O: src$Prsa$Prsa_i15_oaep_decrypt.c $(HEADERSPRIV) $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Prsa_i15_oaep_decrypt$O src$Prsa$Prsa_i15_oaep_decrypt.c @@ -730,6 +749,12 @@ $(OBJDIR)$Prsa_i15_priv$O: src$Prsa$Prsa_i15_priv.c $(HEADERSPRIV) $(OBJDIR)$Prsa_i15_pub$O: src$Prsa$Prsa_i15_pub.c $(HEADERSPRIV) $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Prsa_i15_pub$O src$Prsa$Prsa_i15_pub.c +$(OBJDIR)$Prsa_i31_keygen$O: src$Prsa$Prsa_i31_keygen.c $(HEADERSPRIV) + $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Prsa_i31_keygen$O src$Prsa$Prsa_i31_keygen.c + +$(OBJDIR)$Prsa_i31_keygen_inner$O: src$Prsa$Prsa_i31_keygen_inner.c $(HEADERSPRIV) + $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Prsa_i31_keygen_inner$O src$Prsa$Prsa_i31_keygen_inner.c + $(OBJDIR)$Prsa_i31_oaep_decrypt$O: src$Prsa$Prsa_i31_oaep_decrypt.c $(HEADERSPRIV) $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Prsa_i31_oaep_decrypt$O src$Prsa$Prsa_i31_oaep_decrypt.c @@ -766,6 +791,9 @@ $(OBJDIR)$Prsa_i32_priv$O: src$Prsa$Prsa_i32_priv.c $(HEADERSPRIV) $(OBJDIR)$Prsa_i32_pub$O: src$Prsa$Prsa_i32_pub.c $(HEADERSPRIV) $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Prsa_i32_pub$O src$Prsa$Prsa_i32_pub.c +$(OBJDIR)$Prsa_i62_keygen$O: src$Prsa$Prsa_i62_keygen.c $(HEADERSPRIV) + $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Prsa_i62_keygen$O src$Prsa$Prsa_i62_keygen.c + $(OBJDIR)$Prsa_i62_oaep_decrypt$O: src$Prsa$Prsa_i62_oaep_decrypt.c $(HEADERSPRIV) $(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Prsa_i62_oaep_decrypt$O src$Prsa$Prsa_i62_oaep_decrypt.c diff --git a/mk/mkrules.sh b/mk/mkrules.sh index 44d6148..038d8c0 100755 --- a/mk/mkrules.sh +++ b/mk/mkrules.sh @@ -120,6 +120,7 @@ coresrc=" \ src/int/i15_encode.c \ src/int/i15_fmont.c \ src/int/i15_iszero.c \ + src/int/i15_moddiv.c \ src/int/i15_modpow.c \ src/int/i15_modpow2.c \ src/int/i15_montmul.c \ @@ -138,6 +139,7 @@ coresrc=" \ src/int/i31_encode.c \ src/int/i31_fmont.c \ src/int/i31_iszero.c \ + src/int/i31_moddiv.c \ src/int/i31_modpow.c \ src/int/i31_modpow2.c \ src/int/i31_montmul.c \ @@ -170,18 +172,22 @@ coresrc=" \ src/mac/hmac_ct.c \ src/rand/hmac_drbg.c \ src/rand/sysrng.c \ + src/rsa/rsa_default_keygen.c \ src/rsa/rsa_default_oaep_decrypt.c \ src/rsa/rsa_default_oaep_encrypt.c \ src/rsa/rsa_default_pkcs1_sign.c \ src/rsa/rsa_default_pkcs1_vrfy.c \ src/rsa/rsa_default_priv.c \ src/rsa/rsa_default_pub.c \ + src/rsa/rsa_i15_keygen.c \ src/rsa/rsa_i15_oaep_decrypt.c \ src/rsa/rsa_i15_oaep_encrypt.c \ src/rsa/rsa_i15_pkcs1_sign.c \ src/rsa/rsa_i15_pkcs1_vrfy.c \ src/rsa/rsa_i15_priv.c \ src/rsa/rsa_i15_pub.c \ + src/rsa/rsa_i31_keygen.c \ + src/rsa/rsa_i31_keygen_inner.c \ src/rsa/rsa_i31_oaep_decrypt.c \ src/rsa/rsa_i31_oaep_encrypt.c \ src/rsa/rsa_i31_pkcs1_sign.c \ @@ -194,6 +200,7 @@ coresrc=" \ src/rsa/rsa_i32_pkcs1_vrfy.c \ src/rsa/rsa_i32_priv.c \ src/rsa/rsa_i32_pub.c \ + src/rsa/rsa_i62_keygen.c \ src/rsa/rsa_i62_oaep_decrypt.c \ src/rsa/rsa_i62_oaep_encrypt.c \ src/rsa/rsa_i62_pkcs1_sign.c \ diff --git a/src/inner.h b/src/inner.h index 20211d7..c507102 100644 --- a/src/inner.h +++ b/src/inner.h @@ -48,10 +48,22 @@ * already set their root keys to RSA-4096, so we should be able to * process such keys. * - * This value MUST be a multiple of 64. + * This value MUST be a multiple of 64. This value MUST NOT exceed 47666 + * (some computations in RSA key generation rely on the factor size being + * no more than 23833 bits). RSA key sizes beyond 3072 bits don't make a + * lot of sense anyway. */ #define BR_MAX_RSA_SIZE 4096 +/* + * Minimum size for a RSA modulus (in bits); this value is used only to + * filter out invalid parameters for key pair generation. Normally, + * applications should not use RSA keys smaller than 2048 bits; but some + * specific cases might need shorter keys, for legacy or research + * purposes. + */ +#define BR_MIN_RSA_SIZE 512 + /* * Maximum size for a RSA factor (in bits). This is for RSA private-key * operations. Default is to support factors up to a bit more than half @@ -1474,6 +1486,23 @@ uint32_t br_i31_modpow_opt(uint32_t *x, const unsigned char *e, size_t elen, */ void br_i31_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b); +/* + * Compute x/y mod m, result in x. Values x and y must be between 0 and + * m-1, and have the same announced bit length as m. Modulus m must be + * odd. The "m0i" parameter is equal to -1/m mod 2^31. The array 't' + * must point to a temporary area that can hold at least three integers + * of the size of m. + * + * m may not overlap x and y. x and y may overlap each other (this can + * be useful to test whether a value is invertible modulo m). t must be + * disjoint from all other arrays. + * + * Returned value is 1 on success, 0 otherwise. Success is attained if + * y is invertible modulo m. + */ +uint32_t br_i31_moddiv(uint32_t *x, const uint32_t *y, + const uint32_t *m, uint32_t m0i, uint32_t *t); + /* ==================================================================== */ /* @@ -1528,9 +1557,36 @@ void br_i15_reduce(uint16_t *x, const uint16_t *a, const uint16_t *m); void br_i15_mulacc(uint16_t *d, const uint16_t *a, const uint16_t *b); +uint32_t br_i15_moddiv(uint16_t *x, const uint16_t *y, + const uint16_t *m, uint16_t m0i, uint16_t *t); + +/* + * Variant of br_i31_modpow_opt() that internally uses 64x64->128 + * multiplications. It expects the same parameters as br_i31_modpow_opt(), + * except that the temporaries should be 64-bit integers, not 32-bit + * integers. + */ uint32_t br_i62_modpow_opt(uint32_t *x31, const unsigned char *e, size_t elen, const uint32_t *m31, uint32_t m0i31, uint64_t *tmp, size_t twlen); +/* + * Type for a function with the same API as br_i31_modpow_opt() (some + * implementations of this type may have stricter alignment requirements + * on the temporaries). + */ +typedef uint32_t (*br_i31_modpow_opt_type)(uint32_t *x, + const unsigned char *e, size_t elen, + const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen); + +/* + * Wrapper for br_i62_modpow_opt() that uses the same type as + * br_i31_modpow_opt(); however, it requires its 'tmp' argument to the + * 64-bit aligned. + */ +uint32_t br_i62_modpow_opt_as_i31(uint32_t *x, + const unsigned char *e, size_t elen, + const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen); + /* ==================================================================== */ static inline size_t @@ -1912,6 +1968,15 @@ uint32_t br_rsa_oaep_unpad(const br_hash_class *dig, void br_mgf1_xor(void *data, size_t len, const br_hash_class *dig, const void *seed, size_t seed_len); +/* + * Inner function for RSA key generation; used by the "i31" and "i62" + * implementations. + */ +uint32_t br_rsa_i31_keygen_inner(const br_prng_class **rng, + br_rsa_private_key *sk, unsigned char *kbuf_priv, + br_rsa_public_key *pk, unsigned char *kbuf_pub, + unsigned size, uint32_t pubexp, br_i31_modpow_opt_type mp31); + /* ==================================================================== */ /* * Elliptic curves. diff --git a/src/int/i15_moddiv.c b/src/int/i15_moddiv.c new file mode 100644 index 0000000..45af756 --- /dev/null +++ b/src/int/i15_moddiv.c @@ -0,0 +1,465 @@ +/* + * Copyright (c) 2018 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * In this file, we handle big integers with a custom format, i.e. + * without the usual one-word header. Value is split into 15-bit words, + * each stored in a 16-bit slot (top bit is zero) in little-endian + * order. The length (in words) is provided explicitly. In some cases, + * the value can be negative (using two's complement representation). In + * some cases, the top word is allowed to have a 16th bit. + */ + +/* + * Negate big integer conditionally. The value consists of 'len' words, + * with 15 bits in each word (the top bit of each word should be 0, + * except possibly for the last word). If 'ctl' is 1, the negation is + * computed; otherwise, if 'ctl' is 0, then the value is unchanged. + */ +static void +cond_negate(uint16_t *a, size_t len, uint32_t ctl) +{ + size_t k; + uint32_t cc, xm; + + cc = ctl; + xm = 0x7FFF & -ctl; + for (k = 0; k < len; k ++) { + uint32_t aw; + + aw = a[k]; + aw = (aw ^ xm) + cc; + a[k] = aw & 0x7FFF; + cc = (aw >> 15) & 1; + } +} + +/* + * Finish modular reduction. Rules on input parameters: + * + * if neg = 1, then -m <= a < 0 + * if neg = 0, then 0 <= a < 2*m + * + * If neg = 0, then the top word of a[] may use 16 bits. + * + * Also, modulus m must be odd. + */ +static void +finish_mod(uint16_t *a, size_t len, const uint16_t *m, uint32_t neg) +{ + size_t k; + uint32_t cc, xm, ym; + + /* + * First pass: compare a (assumed nonnegative) with m. + */ + cc = 0; + for (k = 0; k < len; k ++) { + uint32_t aw, mw; + + aw = a[k]; + mw = m[k]; + cc = (aw - mw - cc) >> 31; + } + + /* + * At this point: + * if neg = 1, then we must add m (regardless of cc) + * if neg = 0 and cc = 0, then we must subtract m + * if neg = 0 and cc = 1, then we must do nothing + */ + xm = 0x7FFF & -neg; + ym = -(neg | (1 - cc)); + cc = neg; + for (k = 0; k < len; k ++) { + uint32_t aw, mw; + + aw = a[k]; + mw = (m[k] ^ xm) & ym; + aw = aw - mw - cc; + a[k] = aw & 0x7FFF; + cc = aw >> 31; + } +} + +/* + * Compute: + * a <- (a*pa+b*pb)/(2^15) + * b <- (a*qa+b*qb)/(2^15) + * The division is assumed to be exact (i.e. the low word is dropped). + * If the final a is negative, then it is negated. Similarly for b. + * Returned value is the combination of two bits: + * bit 0: 1 if a had to be negated, 0 otherwise + * bit 1: 1 if b had to be negated, 0 otherwise + * + * Factors pa, pb, qa and qb must be at most 2^15 in absolute value. + * Source integers a and b must be nonnegative; top word is not allowed + * to contain an extra 16th bit. + */ +static uint32_t +co_reduce(uint16_t *a, uint16_t *b, size_t len, + int32_t pa, int32_t pb, int32_t qa, int32_t qb) +{ + size_t k; + int32_t cca, ccb; + uint32_t nega, negb; + + cca = 0; + ccb = 0; + for (k = 0; k < len; k ++) { + uint32_t wa, wb, za, zb; + uint16_t tta, ttb; + + /* + * Since: + * |pa| <= 2^15 + * |pb| <= 2^15 + * 0 <= wa <= 2^15 - 1 + * 0 <= wb <= 2^15 - 1 + * |cca| <= 2^16 - 1 + * Then: + * |za| <= (2^15-1)*(2^16) + (2^16-1) = 2^31 - 1 + * + * Thus, the new value of cca is such that |cca| <= 2^16 - 1. + * The same applies to ccb. + */ + wa = a[k]; + wb = b[k]; + za = wa * (uint32_t)pa + wb * (uint32_t)pb + (uint32_t)cca; + zb = wa * (uint32_t)qa + wb * (uint32_t)qb + (uint32_t)ccb; + if (k > 0) { + a[k - 1] = za & 0x7FFF; + b[k - 1] = zb & 0x7FFF; + } + tta = za >> 15; + ttb = zb >> 15; + cca = *(int16_t *)&tta; + ccb = *(int16_t *)&ttb; + } + a[len - 1] = (uint16_t)cca; + b[len - 1] = (uint16_t)ccb; + nega = (uint32_t)cca >> 31; + negb = (uint32_t)ccb >> 31; + cond_negate(a, len, nega); + cond_negate(b, len, negb); + return nega | (negb << 1); +} + +/* + * Compute: + * a <- (a*pa+b*pb)/(2^15) mod m + * b <- (a*qa+b*qb)/(2^15) mod m + * + * m0i is equal to -1/m[0] mod 2^15. + * + * Factors pa, pb, qa and qb must be at most 2^15 in absolute value. + * Source integers a and b must be nonnegative; top word is not allowed + * to contain an extra 16th bit. + */ +static void +co_reduce_mod(uint16_t *a, uint16_t *b, size_t len, + int32_t pa, int32_t pb, int32_t qa, int32_t qb, + const uint16_t *m, uint16_t m0i) +{ + size_t k; + int32_t cca, ccb, fa, fb; + + cca = 0; + ccb = 0; + fa = ((a[0] * (uint32_t)pa + b[0] * (uint32_t)pb) * m0i) & 0x7FFF; + fb = ((a[0] * (uint32_t)qa + b[0] * (uint32_t)qb) * m0i) & 0x7FFF; + for (k = 0; k < len; k ++) { + uint32_t wa, wb, za, zb; + uint32_t tta, ttb; + + /* + * In this loop, carries 'cca' and 'ccb' always fit on + * 17 bits (in absolute value). + */ + wa = a[k]; + wb = b[k]; + za = wa * (uint32_t)pa + wb * (uint32_t)pb + + m[k] * (uint32_t)fa + (uint32_t)cca; + zb = wa * (uint32_t)qa + wb * (uint32_t)qb + + m[k] * (uint32_t)fb + (uint32_t)ccb; + if (k > 0) { + a[k - 1] = za & 0x7FFF; + b[k - 1] = zb & 0x7FFF; + } + + /* + * The XOR-and-sub construction below does an arithmetic + * right shift in a portable way (technically, right-shifting + * a negative signed value is implementation-defined in C). + */ +#define M ((uint32_t)1 << 16) + tta = za >> 15; + ttb = zb >> 15; + tta = (tta ^ M) - M; + ttb = (ttb ^ M) - M; + cca = *(int32_t *)&tta; + ccb = *(int32_t *)&ttb; +#undef M + } + a[len - 1] = (uint32_t)cca; + b[len - 1] = (uint32_t)ccb; + + /* + * At this point: + * -m <= a < 2*m + * -m <= b < 2*m + * (this is a case of Montgomery reduction) + * The top word of 'a' and 'b' may have a 16-th bit set. + * We may have to add or subtract the modulus. + */ + finish_mod(a, len, m, (uint32_t)cca >> 31); + finish_mod(b, len, m, (uint32_t)ccb >> 31); +} + +/* see inner.h */ +uint32_t +br_i15_moddiv(uint16_t *x, const uint16_t *y, const uint16_t *m, uint16_t m0i, + uint16_t *t) +{ + /* + * Algorithm is an extended binary GCD. We maintain four values + * a, b, u and v, with the following invariants: + * + * a * x = y * u mod m + * b * x = y * v mod m + * + * Starting values are: + * + * a = y + * b = m + * u = x + * v = 0 + * + * The formal definition of the algorithm is a sequence of steps: + * + * - If a is even, then a <- a/2 and u <- u/2 mod m. + * - Otherwise, if b is even, then b <- b/2 and v <- v/2 mod m. + * - Otherwise, if a > b, then a <- (a-b)/2 and u <- (u-v)/2 mod m. + * - Otherwise, b <- (b-a)/2 and v <- (v-u)/2 mod m. + * + * Algorithm stops when a = b. At that point, they both are equal + * to GCD(y,m); the modular division succeeds if that value is 1. + * The result of the modular division is then u (or v: both are + * equal at that point). + * + * Each step makes either a or b shrink by at least one bit; hence, + * if m has bit length k bits, then 2k-2 steps are sufficient. + * + * + * Though complexity is quadratic in the size of m, the bit-by-bit + * processing is not very efficient. We can speed up processing by + * remarking that the decisions are taken based only on observation + * of the top and low bits of a and b. + * + * In the loop below, at each iteration, we use the two top words + * of a and b, and the low words of a and b, to compute reduction + * parameters pa, pb, qa and qb such that the new values for a + * and b are: + * + * a' = (a*pa + b*pb) / (2^15) + * b' = (a*qa + b*qb) / (2^15) + * + * the division being exact. + * + * Since the choices are based on the top words, they may be slightly + * off, requiring an optional correction: if a' < 0, then we replace + * pa with -pa, and pb with -pb. The total length of a and b is + * thus reduced by at least 14 bits at each iteration. + * + * The stopping conditions are still the same, though: when a + * and b become equal, they must be both odd (since m is odd, + * the GCD cannot be even), therefore the next operation is a + * subtraction, and one of the values becomes 0. At that point, + * nothing else happens, i.e. one value is stuck at 0, and the + * other one is the GCD. + */ + size_t len, k; + uint16_t *a, *b, *u, *v; + uint32_t num, r; + + len = (m[0] + 15) >> 4; + a = t; + b = a + len; + u = x + 1; + v = b + len; + memcpy(a, y + 1, len * sizeof *y); + memcpy(b, m + 1, len * sizeof *m); + memset(v, 0, len * sizeof *v); + + /* + * Loop below ensures that a and b are reduced by some bits each, + * for a total of at least 14 bits. + */ + for (num = ((m[0] - (m[0] >> 4)) << 1) + 14; num >= 14; num -= 14) { + size_t j; + uint32_t c0, c1; + uint32_t a0, a1, b0, b1; + uint32_t a_hi, b_hi, a_lo, b_lo; + int32_t pa, pb, qa, qb; + int i; + + /* + * Extract top words of a and b. If j is the highest + * index >= 1 such that a[j] != 0 or b[j] != 0, then we want + * (a[j] << 15) + a[j - 1], and (b[j] << 15) + b[j - 1]. + * If a and b are down to one word each, then we use a[0] + * and b[0]. + */ + c0 = (uint32_t)-1; + c1 = (uint32_t)-1; + a0 = 0; + a1 = 0; + b0 = 0; + b1 = 0; + j = len; + while (j -- > 0) { + uint32_t aw, bw; + + aw = a[j]; + bw = b[j]; + a0 ^= (a0 ^ aw) & c0; + a1 ^= (a1 ^ aw) & c1; + b0 ^= (b0 ^ bw) & c0; + b1 ^= (b1 ^ bw) & c1; + c1 = c0; + c0 &= (((aw | bw) + 0xFFFF) >> 16) - (uint32_t)1; + } + + /* + * If c1 = 0, then we grabbed two words for a and b. + * If c1 != 0 but c0 = 0, then we grabbed one word. It + * is not possible that c1 != 0 and c0 != 0, because that + * would mean that both integers are zero. + */ + a1 |= a0 & c1; + a0 &= ~c1; + b1 |= b0 & c1; + b0 &= ~c1; + a_hi = (a0 << 15) + a1; + b_hi = (b0 << 15) + b1; + a_lo = a[0]; + b_lo = b[0]; + + /* + * Compute reduction factors: + * + * a' = a*pa + b*pb + * b' = a*qa + b*qb + * + * such that a' and b' are both multiple of 2^15, but are + * only marginally larger than a and b. + */ + pa = 1; + pb = 0; + qa = 0; + qb = 1; + for (i = 0; i < 15; i ++) { + /* + * At each iteration: + * + * a <- (a-b)/2 if: a is odd, b is odd, a_hi > b_hi + * b <- (b-a)/2 if: a is odd, b is odd, a_hi <= b_hi + * a <- a/2 if: a is even + * b <- b/2 if: a is odd, b is even + * + * We multiply a_lo and b_lo by 2 at each + * iteration, thus a division by 2 really is a + * non-multiplication by 2. + */ + uint32_t r, oa, ob, cAB, cBA, cA; + + /* + * cAB = 1 if b must be subtracted from a + * cBA = 1 if a must be subtracted from b + * cA = 1 if a is divided by 2, 0 otherwise + * + * Rules: + * + * cAB and cBA cannot be both 1. + * if a is not divided by 2, b is. + */ + r = GT(a_hi, b_hi); + oa = (a_lo >> i) & 1; + ob = (b_lo >> i) & 1; + cAB = oa & ob & r; + cBA = oa & ob & NOT(r); + cA = cAB | NOT(oa); + + /* + * Conditional subtractions. + */ + a_lo -= b_lo & -cAB; + a_hi -= b_hi & -cAB; + pa -= qa & -(int32_t)cAB; + pb -= qb & -(int32_t)cAB; + b_lo -= a_lo & -cBA; + b_hi -= a_hi & -cBA; + qa -= pa & -(int32_t)cBA; + qb -= pb & -(int32_t)cBA; + + /* + * Shifting. + */ + a_lo += a_lo & (cA - 1); + pa += pa & ((int32_t)cA - 1); + pb += pb & ((int32_t)cA - 1); + a_hi ^= (a_hi ^ (a_hi >> 1)) & -cA; + b_lo += b_lo & -cA; + qa += qa & -(int32_t)cA; + qb += qb & -(int32_t)cA; + b_hi ^= (b_hi ^ (b_hi >> 1)) & (cA - 1); + } + + /* + * Replace a and b with new values a' and b'. + */ + r = co_reduce(a, b, len, pa, pb, qa, qb); + pa -= pa * ((r & 1) << 1); + pb -= pb * ((r & 1) << 1); + qa -= qa * (r & 2); + qb -= qb * (r & 2); + co_reduce_mod(u, v, len, pa, pb, qa, qb, m + 1, m0i); + } + + /* + * Now one of the arrays should be 0, and the other contains + * the GCD. If a is 0, then u is 0 as well, and v contains + * the division result. + * Result is correct if and only if GCD is 1. + */ + r = (a[0] | b[0]) ^ 1; + u[0] |= v[0]; + for (k = 1; k < len; k ++) { + r |= a[k] | b[k]; + u[k] |= v[k]; + } + return EQ0(r); +} diff --git a/src/int/i31_moddiv.c b/src/int/i31_moddiv.c new file mode 100644 index 0000000..9950591 --- /dev/null +++ b/src/int/i31_moddiv.c @@ -0,0 +1,488 @@ +/* + * Copyright (c) 2018 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * In this file, we handle big integers with a custom format, i.e. + * without the usual one-word header. Value is split into 31-bit words, + * each stored in a 32-bit slot (top bit is zero) in little-endian + * order. The length (in words) is provided explicitly. In some cases, + * the value can be negative (using two's complement representation). In + * some cases, the top word is allowed to have a 32th bit. + */ + +/* + * Negate big integer conditionally. The value consists of 'len' words, + * with 31 bits in each word (the top bit of each word should be 0, + * except possibly for the last word). If 'ctl' is 1, the negation is + * computed; otherwise, if 'ctl' is 0, then the value is unchanged. + */ +static void +cond_negate(uint32_t *a, size_t len, uint32_t ctl) +{ + size_t k; + uint32_t cc, xm; + + cc = ctl; + xm = -ctl >> 1; + for (k = 0; k < len; k ++) { + uint32_t aw; + + aw = a[k]; + aw = (aw ^ xm) + cc; + a[k] = aw & 0x7FFFFFFF; + cc = aw >> 31; + } +} + +/* + * Finish modular reduction. Rules on input parameters: + * + * if neg = 1, then -m <= a < 0 + * if neg = 0, then 0 <= a < 2*m + * + * If neg = 0, then the top word of a[] may use 32 bits. + * + * Also, modulus m must be odd. + */ +static void +finish_mod(uint32_t *a, size_t len, const uint32_t *m, uint32_t neg) +{ + size_t k; + uint32_t cc, xm, ym; + + /* + * First pass: compare a (assumed nonnegative) with m. + * Note that if the final word uses the top extra bit, then + * subtracting m must yield a value less than 2^31, since we + * assumed that a < 2*m. + */ + cc = 0; + for (k = 0; k < len; k ++) { + uint32_t aw, mw; + + aw = a[k]; + mw = m[k]; + cc = (aw - mw - cc) >> 31; + } + + /* + * At this point: + * if neg = 1, then we must add m (regardless of cc) + * if neg = 0 and cc = 0, then we must subtract m + * if neg = 0 and cc = 1, then we must do nothing + */ + xm = -neg >> 1; + ym = -(neg | (1 - cc)); + cc = neg; + for (k = 0; k < len; k ++) { + uint32_t aw, mw; + + aw = a[k]; + mw = (m[k] ^ xm) & ym; + aw = aw - mw - cc; + a[k] = aw & 0x7FFFFFFF; + cc = aw >> 31; + } +} + +/* + * Compute: + * a <- (a*pa+b*pb)/(2^31) + * b <- (a*qa+b*qb)/(2^31) + * The division is assumed to be exact (i.e. the low word is dropped). + * If the final a is negative, then it is negated. Similarly for b. + * Returned value is the combination of two bits: + * bit 0: 1 if a had to be negated, 0 otherwise + * bit 1: 1 if b had to be negated, 0 otherwise + * + * Factors pa, pb, qa and qb must be at most 2^31 in absolute value. + * Source integers a and b must be nonnegative; top word is not allowed + * to contain an extra 32th bit. + */ +static uint32_t +co_reduce(uint32_t *a, uint32_t *b, size_t len, + int64_t pa, int64_t pb, int64_t qa, int64_t qb) +{ + size_t k; + int64_t cca, ccb; + uint32_t nega, negb; + + cca = 0; + ccb = 0; + for (k = 0; k < len; k ++) { + uint32_t wa, wb; + uint64_t za, zb; + uint64_t tta, ttb; + + /* + * Since: + * |pa| <= 2^31 + * |pb| <= 2^31 + * 0 <= wa <= 2^31 - 1 + * 0 <= wb <= 2^31 - 1 + * |cca| <= 2^32 - 1 + * Then: + * |za| <= (2^31-1)*(2^32) + (2^32-1) = 2^63 - 1 + * + * Thus, the new value of cca is such that |cca| <= 2^32 - 1. + * The same applies to ccb. + */ + wa = a[k]; + wb = b[k]; + za = wa * (uint64_t)pa + wb * (uint64_t)pb + (uint64_t)cca; + zb = wa * (uint64_t)qa + wb * (uint64_t)qb + (uint64_t)ccb; + if (k > 0) { + a[k - 1] = za & 0x7FFFFFFF; + b[k - 1] = zb & 0x7FFFFFFF; + } + + /* + * For the new values of cca and ccb, we need a signed + * right-shift; since, in C, right-shifting a signed + * negative value is implementation-defined, we use a + * custom portable sign extension expression. + */ +#define M ((uint64_t)1 << 32) + tta = za >> 31; + ttb = zb >> 31; + tta = (tta ^ M) - M; + ttb = (ttb ^ M) - M; + cca = *(int64_t *)&tta; + ccb = *(int64_t *)&ttb; +#undef M + } + a[len - 1] = (uint32_t)cca; + b[len - 1] = (uint32_t)ccb; + + nega = (uint32_t)((uint64_t)cca >> 63); + negb = (uint32_t)((uint64_t)ccb >> 63); + cond_negate(a, len, nega); + cond_negate(b, len, negb); + return nega | (negb << 1); +} + +/* + * Compute: + * a <- (a*pa+b*pb)/(2^31) mod m + * b <- (a*qa+b*qb)/(2^31) mod m + * + * m0i is equal to -1/m[0] mod 2^31. + * + * Factors pa, pb, qa and qb must be at most 2^31 in absolute value. + * Source integers a and b must be nonnegative; top word is not allowed + * to contain an extra 32th bit. + */ +static void +co_reduce_mod(uint32_t *a, uint32_t *b, size_t len, + int64_t pa, int64_t pb, int64_t qa, int64_t qb, + const uint32_t *m, uint32_t m0i) +{ + size_t k; + int64_t cca, ccb; + uint32_t fa, fb; + + cca = 0; + ccb = 0; + fa = ((a[0] * (uint32_t)pa + b[0] * (uint32_t)pb) * m0i) & 0x7FFFFFFF; + fb = ((a[0] * (uint32_t)qa + b[0] * (uint32_t)qb) * m0i) & 0x7FFFFFFF; + for (k = 0; k < len; k ++) { + uint32_t wa, wb; + uint64_t za, zb; + uint64_t tta, ttb; + + /* + * In this loop, carries 'cca' and 'ccb' always fit on + * 33 bits (in absolute value). + */ + wa = a[k]; + wb = b[k]; + za = wa * (uint64_t)pa + wb * (uint64_t)pb + + m[k] * (uint64_t)fa + (uint64_t)cca; + zb = wa * (uint64_t)qa + wb * (uint64_t)qb + + m[k] * (uint64_t)fb + (uint64_t)ccb; + if (k > 0) { + a[k - 1] = (uint32_t)za & 0x7FFFFFFF; + b[k - 1] = (uint32_t)zb & 0x7FFFFFFF; + } + +#define M ((uint64_t)1 << 32) + tta = za >> 31; + ttb = zb >> 31; + tta = (tta ^ M) - M; + ttb = (ttb ^ M) - M; + cca = *(int64_t *)&tta; + ccb = *(int64_t *)&ttb; +#undef M + } + a[len - 1] = (uint32_t)cca; + b[len - 1] = (uint32_t)ccb; + + /* + * At this point: + * -m <= a < 2*m + * -m <= b < 2*m + * (this is a case of Montgomery reduction) + * The top word of 'a' and 'b' may have a 32-th bit set. + * We may have to add or subtract the modulus. + */ + finish_mod(a, len, m, (uint32_t)((uint64_t)cca >> 63)); + finish_mod(b, len, m, (uint32_t)((uint64_t)ccb >> 63)); +} + +/* see inner.h */ +uint32_t +br_i31_moddiv(uint32_t *x, const uint32_t *y, const uint32_t *m, uint32_t m0i, + uint32_t *t) +{ + /* + * Algorithm is an extended binary GCD. We maintain four values + * a, b, u and v, with the following invariants: + * + * a * x = y * u mod m + * b * x = y * v mod m + * + * Starting values are: + * + * a = y + * b = m + * u = x + * v = 0 + * + * The formal definition of the algorithm is a sequence of steps: + * + * - If a is even, then a <- a/2 and u <- u/2 mod m. + * - Otherwise, if b is even, then b <- b/2 and v <- v/2 mod m. + * - Otherwise, if a > b, then a <- (a-b)/2 and u <- (u-v)/2 mod m. + * - Otherwise, b <- (b-a)/2 and v <- (v-u)/2 mod m. + * + * Algorithm stops when a = b. At that point, they both are equal + * to GCD(y,m); the modular division succeeds if that value is 1. + * The result of the modular division is then u (or v: both are + * equal at that point). + * + * Each step makes either a or b shrink by at least one bit; hence, + * if m has bit length k bits, then 2k-2 steps are sufficient. + * + * + * Though complexity is quadratic in the size of m, the bit-by-bit + * processing is not very efficient. We can speed up processing by + * remarking that the decisions are taken based only on observation + * of the top and low bits of a and b. + * + * In the loop below, at each iteration, we use the two top words + * of a and b, and the low words of a and b, to compute reduction + * parameters pa, pb, qa and qb such that the new values for a + * and b are: + * + * a' = (a*pa + b*pb) / (2^31) + * b' = (a*qa + b*qb) / (2^31) + * + * the division being exact. + * + * Since the choices are based on the top words, they may be slightly + * off, requiring an optional correction: if a' < 0, then we replace + * pa with -pa, and pb with -pb. The total length of a and b is + * thus reduced by at least 30 bits at each iteration. + * + * The stopping conditions are still the same, though: when a + * and b become equal, they must be both odd (since m is odd, + * the GCD cannot be even), therefore the next operation is a + * subtraction, and one of the values becomes 0. At that point, + * nothing else happens, i.e. one value is stuck at 0, and the + * other one is the GCD. + */ + size_t len, k; + uint32_t *a, *b, *u, *v; + uint32_t num, r; + + len = (m[0] + 31) >> 5; + a = t; + b = a + len; + u = x + 1; + v = b + len; + memcpy(a, y + 1, len * sizeof *y); + memcpy(b, m + 1, len * sizeof *m); + memset(v, 0, len * sizeof *v); + + /* + * Loop below ensures that a and b are reduced by some bits each, + * for a total of at least 30 bits. + */ + for (num = ((m[0] - (m[0] >> 5)) << 1) + 30; num >= 30; num -= 30) { + size_t j; + uint32_t c0, c1; + uint32_t a0, a1, b0, b1; + uint64_t a_hi, b_hi; + uint32_t a_lo, b_lo; + int64_t pa, pb, qa, qb; + int i; + + /* + * Extract top words of a and b. If j is the highest + * index >= 1 such that a[j] != 0 or b[j] != 0, then we want + * (a[j] << 31) + a[j - 1], and (b[j] << 31) + b[j - 1]. + * If a and b are down to one word each, then we use a[0] + * and b[0]. + */ + c0 = (uint32_t)-1; + c1 = (uint32_t)-1; + a0 = 0; + a1 = 0; + b0 = 0; + b1 = 0; + j = len; + while (j -- > 0) { + uint32_t aw, bw; + + aw = a[j]; + bw = b[j]; + a0 ^= (a0 ^ aw) & c0; + a1 ^= (a1 ^ aw) & c1; + b0 ^= (b0 ^ bw) & c0; + b1 ^= (b1 ^ bw) & c1; + c1 = c0; + c0 &= (((aw | bw) + 0x7FFFFFFF) >> 31) - (uint32_t)1; + } + + /* + * If c1 = 0, then we grabbed two words for a and b. + * If c1 != 0 but c0 = 0, then we grabbed one word. It + * is not possible that c1 != 0 and c0 != 0, because that + * would mean that both integers are zero. + */ + a1 |= a0 & c1; + a0 &= ~c1; + b1 |= b0 & c1; + b0 &= ~c1; + a_hi = ((uint64_t)a0 << 31) + a1; + b_hi = ((uint64_t)b0 << 31) + b1; + a_lo = a[0]; + b_lo = b[0]; + + /* + * Compute reduction factors: + * + * a' = a*pa + b*pb + * b' = a*qa + b*qb + * + * such that a' and b' are both multiple of 2^31, but are + * only marginally larger than a and b. + */ + pa = 1; + pb = 0; + qa = 0; + qb = 1; + for (i = 0; i < 31; i ++) { + /* + * At each iteration: + * + * a <- (a-b)/2 if: a is odd, b is odd, a_hi > b_hi + * b <- (b-a)/2 if: a is odd, b is odd, a_hi <= b_hi + * a <- a/2 if: a is even + * b <- b/2 if: a is odd, b is even + * + * We multiply a_lo and b_lo by 2 at each + * iteration, thus a division by 2 really is a + * non-multiplication by 2. + */ + uint32_t r, oa, ob, cAB, cBA, cA; + uint64_t rz; + + /* + * r = GT(a_hi, b_hi) + * But the GT() function works on uint32_t operands, + * so we inline a 64-bit version here. + */ + rz = b_hi - a_hi; + r = (uint32_t)((rz ^ ((a_hi ^ b_hi) + & (a_hi ^ rz))) >> 63); + + /* + * cAB = 1 if b must be subtracted from a + * cBA = 1 if a must be subtracted from b + * cA = 1 if a is divided by 2, 0 otherwise + * + * Rules: + * + * cAB and cBA cannot be both 1. + * if a is not divided by 2, b is. + */ + oa = (a_lo >> i) & 1; + ob = (b_lo >> i) & 1; + cAB = oa & ob & r; + cBA = oa & ob & NOT(r); + cA = cAB | NOT(oa); + + /* + * Conditional subtractions. + */ + a_lo -= b_lo & -cAB; + a_hi -= b_hi & -(uint64_t)cAB; + pa -= qa & -(int64_t)cAB; + pb -= qb & -(int64_t)cAB; + b_lo -= a_lo & -cBA; + b_hi -= a_hi & -(uint64_t)cBA; + qa -= pa & -(int64_t)cBA; + qb -= pb & -(int64_t)cBA; + + /* + * Shifting. + */ + a_lo += a_lo & (cA - 1); + pa += pa & ((int64_t)cA - 1); + pb += pb & ((int64_t)cA - 1); + a_hi ^= (a_hi ^ (a_hi >> 1)) & -(uint64_t)cA; + b_lo += b_lo & -cA; + qa += qa & -(int64_t)cA; + qb += qb & -(int64_t)cA; + b_hi ^= (b_hi ^ (b_hi >> 1)) & ((uint64_t)cA - 1); + } + + /* + * Replace a and b with new values a' and b'. + */ + r = co_reduce(a, b, len, pa, pb, qa, qb); + pa -= pa * ((r & 1) << 1); + pb -= pb * ((r & 1) << 1); + qa -= qa * (r & 2); + qb -= qb * (r & 2); + co_reduce_mod(u, v, len, pa, pb, qa, qb, m + 1, m0i); + } + + /* + * Now one of the arrays should be 0, and the other contains + * the GCD. If a is 0, then u is 0 as well, and v contains + * the division result. + * Result is correct if and only if GCD is 1. + */ + r = (a[0] | b[0]) ^ 1; + u[0] |= v[0]; + for (k = 1; k < len; k ++) { + r |= a[k] | b[k]; + u[k] |= v[k]; + } + return EQ0(r); +} diff --git a/src/int/i62_modpow2.c b/src/int/i62_modpow2.c index 0414e5f..2db537f 100644 --- a/src/int/i62_modpow2.c +++ b/src/int/i62_modpow2.c @@ -244,7 +244,7 @@ br_i62_modpow_opt(uint32_t *x31, const unsigned char *e, size_t elen, mw62num = (mw31num + 1) >> 1; /* - * In order to apply this function, we must have enough room tp + * In order to apply this function, we must have enough room to * copy the operand and modulus into the temporary array, along * with at least two temporaries. If there is not enough room, * switch to br_i31_modpow(). We also use br_i31_modpow() if the @@ -477,3 +477,17 @@ br_i62_modpow_opt(uint32_t *x31, const unsigned char *e, size_t elen, } #endif + +/* see inner.h */ +uint32_t +br_i62_modpow_opt_as_i31(uint32_t *x31, const unsigned char *e, size_t elen, + const uint32_t *m31, uint32_t m0i31, uint32_t *tmp, size_t twlen) +{ + /* + * As documented, this function expects the 'tmp' argument to be + * 64-bit aligned. This is OK since this function is internal (it + * is not part of BearSSL's public API). + */ + return br_i62_modpow_opt(x31, e, elen, m31, m0i31, + (uint64_t *)tmp, twlen >> 1); +} diff --git a/src/rsa/rsa_default_keygen.c b/src/rsa/rsa_default_keygen.c new file mode 100644 index 0000000..f2e83c8 --- /dev/null +++ b/src/rsa/rsa_default_keygen.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2018 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +br_rsa_keygen +br_rsa_keygen_get_default(void) +{ +#if BR_INT128 || BR_UMUL128 + return &br_rsa_i62_keygen; +#elif BR_LOMUL + return &br_rsa_i15_keygen; +#else + return &br_rsa_i31_keygen; +#endif +} diff --git a/src/rsa/rsa_i15_keygen.c b/src/rsa/rsa_i15_keygen.c new file mode 100644 index 0000000..0f4435f --- /dev/null +++ b/src/rsa/rsa_i15_keygen.c @@ -0,0 +1,583 @@ +/* + * Copyright (c) 2018 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Make a random integer of the provided size. The size is encoded. + * The header word is untouched. + */ +static void +mkrand(const br_prng_class **rng, uint16_t *x, uint32_t esize) +{ + size_t u, len; + unsigned m; + + len = (esize + 15) >> 4; + (*rng)->generate(rng, x + 1, len * sizeof(uint16_t)); + for (u = 1; u < len; u ++) { + x[u] &= 0x7FFF; + } + m = esize & 15; + if (m == 0) { + x[len] &= 0x7FFF; + } else { + x[len] &= 0x7FFF >> (15 - m); + } +} + +/* + * This is the big-endian unsigned representation of the product of + * all small primes from 13 to 1481. + */ +static const unsigned char SMALL_PRIMES[] = { + 0x2E, 0xAB, 0x92, 0xD1, 0x8B, 0x12, 0x47, 0x31, 0x54, 0x0A, + 0x99, 0x5D, 0x25, 0x5E, 0xE2, 0x14, 0x96, 0x29, 0x1E, 0xB7, + 0x78, 0x70, 0xCC, 0x1F, 0xA5, 0xAB, 0x8D, 0x72, 0x11, 0x37, + 0xFB, 0xD8, 0x1E, 0x3F, 0x5B, 0x34, 0x30, 0x17, 0x8B, 0xE5, + 0x26, 0x28, 0x23, 0xA1, 0x8A, 0xA4, 0x29, 0xEA, 0xFD, 0x9E, + 0x39, 0x60, 0x8A, 0xF3, 0xB5, 0xA6, 0xEB, 0x3F, 0x02, 0xB6, + 0x16, 0xC3, 0x96, 0x9D, 0x38, 0xB0, 0x7D, 0x82, 0x87, 0x0C, + 0xF7, 0xBE, 0x24, 0xE5, 0x5F, 0x41, 0x04, 0x79, 0x76, 0x40, + 0xE7, 0x00, 0x22, 0x7E, 0xB5, 0x85, 0x7F, 0x8D, 0x01, 0x50, + 0xE9, 0xD3, 0x29, 0x42, 0x08, 0xB3, 0x51, 0x40, 0x7B, 0xD7, + 0x8D, 0xCC, 0x10, 0x01, 0x64, 0x59, 0x28, 0xB6, 0x53, 0xF3, + 0x50, 0x4E, 0xB1, 0xF2, 0x58, 0xCD, 0x6E, 0xF5, 0x56, 0x3E, + 0x66, 0x2F, 0xD7, 0x07, 0x7F, 0x52, 0x4C, 0x13, 0x24, 0xDC, + 0x8E, 0x8D, 0xCC, 0xED, 0x77, 0xC4, 0x21, 0xD2, 0xFD, 0x08, + 0xEA, 0xD7, 0xC0, 0x5C, 0x13, 0x82, 0x81, 0x31, 0x2F, 0x2B, + 0x08, 0xE4, 0x80, 0x04, 0x7A, 0x0C, 0x8A, 0x3C, 0xDC, 0x22, + 0xE4, 0x5A, 0x7A, 0xB0, 0x12, 0x5E, 0x4A, 0x76, 0x94, 0x77, + 0xC2, 0x0E, 0x92, 0xBA, 0x8A, 0xA0, 0x1F, 0x14, 0x51, 0x1E, + 0x66, 0x6C, 0x38, 0x03, 0x6C, 0xC7, 0x4A, 0x4B, 0x70, 0x80, + 0xAF, 0xCA, 0x84, 0x51, 0xD8, 0xD2, 0x26, 0x49, 0xF5, 0xA8, + 0x5E, 0x35, 0x4B, 0xAC, 0xCE, 0x29, 0x92, 0x33, 0xB7, 0xA2, + 0x69, 0x7D, 0x0C, 0xE0, 0x9C, 0xDB, 0x04, 0xD6, 0xB4, 0xBC, + 0x39, 0xD7, 0x7F, 0x9E, 0x9D, 0x78, 0x38, 0x7F, 0x51, 0x54, + 0x50, 0x8B, 0x9E, 0x9C, 0x03, 0x6C, 0xF5, 0x9D, 0x2C, 0x74, + 0x57, 0xF0, 0x27, 0x2A, 0xC3, 0x47, 0xCA, 0xB9, 0xD7, 0x5C, + 0xFF, 0xC2, 0xAC, 0x65, 0x4E, 0xBD +}; + +/* + * We need temporary values for at least 7 integers of the same size + * as a factor (including header word); more space helps with performance + * (in modular exponentiations), but we much prefer to remain under + * 2 kilobytes in total, to save stack space. The macro TEMPS below + * exceeds 1024 (which is a count in 16-bit words) when BR_MAX_RSA_SIZE + * is greater than 4350 (default value is 4096, so the 2-kB limit is + * maintained unless BR_MAX_RSA_SIZE was modified). + */ +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define TEMPS MAX(1024, 7 * ((((BR_MAX_RSA_SIZE + 1) >> 1) + 29) / 15)) + +/* + * Perform trial division on a candidate prime. This computes + * y = SMALL_PRIMES mod x, then tries to compute y/y mod x. The + * br_i15_moddiv() function will report an error if y is not invertible + * modulo x. Returned value is 1 on success (none of the small primes + * divides x), 0 on error (a non-trivial GCD is obtained). + * + * This function assumes that x is odd. + */ +static uint32_t +trial_divisions(const uint16_t *x, uint16_t *t) +{ + uint16_t *y; + uint16_t x0i; + + y = t; + t += 1 + ((x[0] + 15) >> 4); + x0i = br_i15_ninv15(x[1]); + br_i15_decode_reduce(y, SMALL_PRIMES, sizeof SMALL_PRIMES, x); + return br_i15_moddiv(y, y, x, x0i, t); +} + +/* + * Perform n rounds of Miller-Rabin on the candidate prime x. This + * function assumes that x = 3 mod 4. + * + * Returned value is 1 on success (all rounds completed successfully), + * 0 otherwise. + */ +static uint32_t +miller_rabin(const br_prng_class **rng, const uint16_t *x, int n, + uint16_t *t, size_t tlen) +{ + /* + * Since x = 3 mod 4, the Miller-Rabin test is simple: + * - get a random base a (such that 1 < a < x-1) + * - compute z = a^((x-1)/2) mod x + * - if z != 1 and z != x-1, the number x is composite + * + * We generate bases 'a' randomly with a size which is + * one bit less than x, which ensures that a < x-1. It + * is not useful to verify that a > 1 because the probability + * that we get a value a equal to 0 or 1 is much smaller + * than the probability of our Miller-Rabin tests not to + * detect a composite, which is already quite smaller than the + * probability of the hardware misbehaving and return a + * composite integer because of some glitch (e.g. bad RAM + * or ill-timed cosmic ray). + */ + unsigned char *xm1d2; + size_t xlen, xm1d2_len, xm1d2_len_u16, u; + uint32_t asize; + unsigned cc; + uint16_t x0i; + + /* + * Compute (x-1)/2 (encoded). + */ + xm1d2 = (unsigned char *)t; + xm1d2_len = ((x[0] - (x[0] >> 4)) + 7) >> 3; + br_i15_encode(xm1d2, xm1d2_len, x); + cc = 0; + for (u = 0; u < xm1d2_len; u ++) { + unsigned w; + + w = xm1d2[u]; + xm1d2[u] = (unsigned char)((w >> 1) | cc); + cc = w << 7; + } + + /* + * We used some words of the provided buffer for (x-1)/2. + */ + xm1d2_len_u16 = (xm1d2_len + 1) >> 1; + t += xm1d2_len_u16; + tlen -= xm1d2_len_u16; + + xlen = (x[0] + 15) >> 4; + asize = x[0] - 1 - EQ0(x[0] & 15); + x0i = br_i15_ninv15(x[1]); + while (n -- > 0) { + uint16_t *a; + uint32_t eq1, eqm1; + + /* + * Generate a random base. We don't need the base to be + * really uniform modulo x, so we just get a random + * number which is one bit shorter than x. + */ + a = t; + a[0] = x[0]; + a[xlen] = 0; + mkrand(rng, a, asize); + + /* + * Compute a^((x-1)/2) mod x. We assume here that the + * function will not fail (the temporary array is large + * enough). + */ + br_i15_modpow_opt(a, xm1d2, xm1d2_len, + x, x0i, t + 1 + xlen, tlen - 1 - xlen); + + /* + * We must obtain either 1 or x-1. Note that x is odd, + * hence x-1 differs from x only in its low word (no + * carry). + */ + eq1 = a[1] ^ 1; + eqm1 = a[1] ^ (x[1] - 1); + for (u = 2; u <= xlen; u ++) { + eq1 |= a[u]; + eqm1 |= a[u] ^ x[u]; + } + + if ((EQ0(eq1) | EQ0(eqm1)) == 0) { + return 0; + } + } + return 1; +} + +/* + * Create a random prime of the provided size. 'size' is the _encoded_ + * bit length. The two top bits and the two bottom bits are set to 1. + */ +static void +mkprime(const br_prng_class **rng, uint16_t *x, uint32_t esize, + uint32_t pubexp, uint16_t *t, size_t tlen) +{ + size_t len; + + x[0] = esize; + len = (esize + 15) >> 4; + for (;;) { + size_t u; + uint32_t m3, m5, m7, m11; + int rounds; + + /* + * Generate random bits. We force the two top bits and the + * two bottom bits to 1. + */ + mkrand(rng, x, esize); + if ((esize & 15) == 0) { + x[len] |= 0x6000; + } else if ((esize & 15) == 1) { + x[len] |= 0x0001; + x[len - 1] |= 0x4000; + } else { + x[len] |= 0x0003 << ((esize & 15) - 2); + } + x[1] |= 0x0003; + + /* + * Trial division with low primes (3, 5, 7 and 11). We + * use the following properties: + * + * 2^2 = 1 mod 3 + * 2^4 = 1 mod 5 + * 2^3 = 1 mod 7 + * 2^10 = 1 mod 11 + */ + m3 = 0; + m5 = 0; + m7 = 0; + m11 = 0; + for (u = 0; u < len; u ++) { + uint32_t w; + + w = x[1 + u]; + m3 += w << (u & 1); + m3 = (m3 & 0xFF) + (m3 >> 8); + m5 += w << ((4 - u) & 3); + m5 = (m5 & 0xFF) + (m5 >> 8); + m7 += w; + m7 = (m7 & 0x1FF) + (m7 >> 9); + m11 += w << (5 & -(u & 1)); + m11 = (m11 & 0x3FF) + (m11 >> 10); + } + + /* + * Maximum values of m* at this point: + * m3: 511 + * m5: 2310 + * m7: 510 + * m11: 2047 + * We use the same properties to make further reductions. + */ + + m3 = (m3 & 0x0F) + (m3 >> 4); /* max: 46 */ + m3 = (m3 & 0x0F) + (m3 >> 4); /* max: 16 */ + m3 = ((m3 * 43) >> 5) & 3; + + m5 = (m5 & 0xFF) + (m5 >> 8); /* max: 263 */ + m5 = (m5 & 0x0F) + (m5 >> 4); /* max: 30 */ + m5 = (m5 & 0x0F) + (m5 >> 4); /* max: 15 */ + m5 -= 10 & -GT(m5, 9); + m5 -= 5 & -GT(m5, 4); + + m7 = (m7 & 0x3F) + (m7 >> 6); /* max: 69 */ + m7 = (m7 & 7) + (m7 >> 3); /* max: 14 */ + m7 = ((m7 * 147) >> 7) & 7; + + /* + * 2^5 = 32 = -1 mod 11. + */ + m11 = (m11 & 0x1F) + 66 - (m11 >> 5); /* max: 97 */ + m11 -= 88 & -GT(m11, 87); + m11 -= 44 & -GT(m11, 43); + m11 -= 22 & -GT(m11, 21); + m11 -= 11 & -GT(m11, 10); + + /* + * If any of these modulo is 0, then the candidate is + * not prime. Also, if pubexp is 3, 5, 7 or 11, and the + * corresponding modulus is 1, then the candidate must + * be rejected, because we need e to be invertible + * modulo p-1. We can use simple comparisons here + * because they won't leak information on a candidate + * that we keep, only on one that we reject (and is thus + * not secret). + */ + if (m3 == 0 || m5 == 0 || m7 == 0 || m11 == 0) { + continue; + } + if ((pubexp == 3 && m3 == 1) + || (pubexp == 5 && m5 == 5) + || (pubexp == 7 && m5 == 7) + || (pubexp == 11 && m5 == 11)) + { + continue; + } + + /* + * More trial divisions. + */ + if (!trial_divisions(x, t)) { + continue; + } + + /* + * Miller-Rabin algorithm. Since we selected a random + * integer, not a maliciously crafted integer, we can use + * relatively few rounds to lower the risk of a false + * positive (i.e. declaring prime a non-prime) under + * 2^(-80). It is not useful to lower the probability much + * below that, since that would be substantially below + * the probability of the hardware misbehaving. Sufficient + * numbers of rounds are extracted from the Handbook of + * Applied Cryptography, note 4.49 (page 149). + * + * Since we work on the encoded size (esize), we need to + * compare with encoded thresholds. + */ + if (esize < 320) { + rounds = 12; + } else if (esize < 480) { + rounds = 9; + } else if (esize < 693) { + rounds = 6; + } else if (esize < 906) { + rounds = 4; + } else if (esize < 1386) { + rounds = 3; + } else { + rounds = 2; + } + + if (miller_rabin(rng, x, rounds, t, tlen)) { + return; + } + } +} + +/* + * Let p be a prime (p > 2^33, p = 3 mod 4). Let m = (p-1)/2, provided + * as parameter (with announced bit length equal to that of p). This + * function computes d = 1/e mod p-1 (for an odd integer e). Returned + * value is 1 on success, 0 on error (an error is reported if e is not + * invertible modulo p-1). + * + * The temporary buffer (t) must have room for at least 4 integers of + * the size of p. + */ +static uint32_t +invert_pubexp(uint16_t *d, const uint16_t *m, uint32_t e, uint16_t *t) +{ + uint16_t *f; + uint32_t r; + + f = t; + t += 1 + ((m[0] + 15) >> 4); + + /* + * Compute d = 1/e mod m. Since p = 3 mod 4, m is odd. + */ + br_i15_zero(d, m[0]); + d[1] = 1; + br_i15_zero(f, m[0]); + f[1] = e & 0x7FFF; + f[2] = (e >> 15) & 0x7FFF; + f[3] = e >> 30; + r = br_i15_moddiv(d, f, m, br_i15_ninv15(m[1]), t); + + /* + * We really want d = 1/e mod p-1, with p = 2m. By the CRT, + * the result is either the d we got, or d + m. + * + * Let's write e*d = 1 + k*m, for some integer k. Integers e + * and m are odd. If d is odd, then e*d is odd, which implies + * that k must be even; in that case, e*d = 1 + (k/2)*2m, and + * thus d is already fine. Conversely, if d is even, then k + * is odd, and we must add m to d in order to get the correct + * result. + */ + br_i15_add(d, m, (uint32_t)(1 - (d[1] & 1))); + + return r; +} + +/* + * Swap two buffers in RAM. They must be disjoint. + */ +static void +bufswap(void *b1, void *b2, size_t len) +{ + size_t u; + unsigned char *buf1, *buf2; + + buf1 = b1; + buf2 = b2; + for (u = 0; u < len; u ++) { + unsigned w; + + w = buf1[u]; + buf1[u] = buf2[u]; + buf2[u] = w; + } +} + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i15_keygen(const br_prng_class **rng, + br_rsa_private_key *sk, unsigned char *kbuf_priv, + br_rsa_public_key *pk, unsigned char *kbuf_pub, + unsigned size, uint32_t pubexp) +{ + uint32_t esize_p, esize_q; + size_t plen, qlen, tlen; + uint16_t *p, *q, *t; + uint16_t tmp[TEMPS]; + uint32_t r; + + if (size < BR_MIN_RSA_SIZE || size > BR_MAX_RSA_SIZE) { + return 0; + } + if (pubexp == 0) { + pubexp = 3; + } else if (pubexp == 1 || (pubexp & 1) == 0) { + return 0; + } + + esize_p = (size + 1) >> 1; + esize_q = size - esize_p; + sk->n_bitlen = size; + sk->p = kbuf_priv; + sk->plen = (esize_p + 7) >> 3; + sk->q = sk->p + sk->plen; + sk->qlen = (esize_q + 7) >> 3; + sk->dp = sk->q + sk->qlen; + sk->dplen = sk->plen; + sk->dq = sk->dp + sk->dplen; + sk->dqlen = sk->qlen; + sk->iq = sk->dq + sk->dqlen; + sk->iqlen = sk->plen; + + if (pk != NULL) { + pk->n = kbuf_pub; + pk->nlen = (size + 7) >> 3; + pk->e = pk->n + pk->nlen; + pk->elen = 4; + br_enc32be(pk->e, pubexp); + while (*pk->e == 0) { + pk->e ++; + pk->elen --; + } + } + + /* + * We now switch to encoded sizes. + * + * floor((x * 17477) / (2^18)) is equal to floor(x/15) for all + * integers x from 0 to 23833. + */ + esize_p += MUL15(esize_p, 17477) >> 18; + esize_q += MUL15(esize_q, 17477) >> 18; + plen = (esize_p + 15) >> 4; + qlen = (esize_q + 15) >> 4; + p = tmp; + q = p + 1 + plen; + t = q + 1 + qlen; + tlen = ((sizeof tmp) / sizeof(uint16_t)) - (2 + plen + qlen); + + /* + * When looking for primes p and q, we temporarily divide + * candidates by 2, in order to compute the inverse of the + * public exponent. + */ + + for (;;) { + mkprime(rng, p, esize_p, pubexp, t, tlen); + br_i15_rshift(p, 1); + if (invert_pubexp(t, p, pubexp, t + 1 + plen)) { + br_i15_add(p, p, 1); + p[1] |= 1; + br_i15_encode(sk->p, sk->plen, p); + br_i15_encode(sk->dp, sk->dplen, t); + break; + } + } + + for (;;) { + mkprime(rng, q, esize_q, pubexp, t, tlen); + br_i15_rshift(q, 1); + if (invert_pubexp(t, q, pubexp, t + 1 + qlen)) { + br_i15_add(q, q, 1); + q[1] |= 1; + br_i15_encode(sk->q, sk->qlen, q); + br_i15_encode(sk->dq, sk->dqlen, t); + break; + } + } + + /* + * If p and q have the same size, then it is possible that q > p + * (when the target modulus size is odd, we generate p with a + * greater bit length than q). If q > p, we want to swap p and q + * (and also dp and dq) for two reasons: + * - The final step below (inversion of q modulo p) is easier if + * p > q. + * - While BearSSL's RSA code is perfectly happy with RSA keys such + * that p < q, some other implementations have restrictions and + * require p > q. + * + * Note that we can do a simple non-constant-time swap here, + * because the only information we leak here is that we insist on + * returning p and q such that p > q, which is not a secret. + */ + if (esize_p == esize_q && br_i15_sub(p, q, 0) == 1) { + bufswap(p, q, (1 + plen) * sizeof *p); + bufswap(sk->p, sk->q, sk->plen); + bufswap(sk->dp, sk->dq, sk->dplen); + } + + /* + * We have produced p, q, dp and dq. We can now compute iq = 1/d mod p. + * + * We ensured that p >= q, so this is just a matter of updating the + * header word for q (and possibly adding an extra word). + * + * Theoretically, the call below may fail, in case we were + * extraordinarily unlucky, and p = q. Another failure case is if + * Miller-Rabin failed us _twice_, and p and q are non-prime and + * have a factor is common. We report the error mostly because it + * is cheap and we can, but in practice this never happens (or, at + * least, it happens way less often than hardware glitches). + */ + q[0] = p[0]; + if (plen > qlen) { + q[plen] = 0; + t ++; + tlen --; + } + br_i15_zero(t, p[0]); + t[1] = 1; + r = br_i15_moddiv(t, q, p, br_i15_ninv15(p[1]), t + 1 + plen); + br_i15_encode(sk->iq, sk->iqlen, t); + + /* + * Compute the public modulus too, if required. + */ + if (pk != NULL) { + br_i15_zero(t, p[0]); + br_i15_mulacc(t, p, q); + br_i15_encode(pk->n, pk->nlen, t); + } + + return r; +} diff --git a/src/rsa/rsa_i31_keygen.c b/src/rsa/rsa_i31_keygen.c new file mode 100644 index 0000000..0e26246 --- /dev/null +++ b/src/rsa/rsa_i31_keygen.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2018 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i31_keygen(const br_prng_class **rng, + br_rsa_private_key *sk, unsigned char *kbuf_priv, + br_rsa_public_key *pk, unsigned char *kbuf_pub, + unsigned size, uint32_t pubexp) +{ + return br_rsa_i31_keygen_inner(rng, + sk, kbuf_priv, pk, kbuf_pub, size, pubexp, + &br_i31_modpow_opt); +} diff --git a/src/rsa/rsa_i31_keygen_inner.c b/src/rsa/rsa_i31_keygen_inner.c new file mode 100644 index 0000000..69120e7 --- /dev/null +++ b/src/rsa/rsa_i31_keygen_inner.c @@ -0,0 +1,608 @@ +/* + * Copyright (c) 2018 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Make a random integer of the provided size. The size is encoded. + * The header word is untouched. + */ +static void +mkrand(const br_prng_class **rng, uint32_t *x, uint32_t esize) +{ + size_t u, len; + unsigned m; + + len = (esize + 31) >> 5; + (*rng)->generate(rng, x + 1, len * sizeof(uint32_t)); + for (u = 1; u < len; u ++) { + x[u] &= 0x7FFFFFFF; + } + m = esize & 31; + if (m == 0) { + x[len] &= 0x7FFFFFFF; + } else { + x[len] &= 0x7FFFFFFF >> (31 - m); + } +} + +/* + * This is the big-endian unsigned representation of the product of + * all small primes from 13 to 1481. + */ +static const unsigned char SMALL_PRIMES[] = { + 0x2E, 0xAB, 0x92, 0xD1, 0x8B, 0x12, 0x47, 0x31, 0x54, 0x0A, + 0x99, 0x5D, 0x25, 0x5E, 0xE2, 0x14, 0x96, 0x29, 0x1E, 0xB7, + 0x78, 0x70, 0xCC, 0x1F, 0xA5, 0xAB, 0x8D, 0x72, 0x11, 0x37, + 0xFB, 0xD8, 0x1E, 0x3F, 0x5B, 0x34, 0x30, 0x17, 0x8B, 0xE5, + 0x26, 0x28, 0x23, 0xA1, 0x8A, 0xA4, 0x29, 0xEA, 0xFD, 0x9E, + 0x39, 0x60, 0x8A, 0xF3, 0xB5, 0xA6, 0xEB, 0x3F, 0x02, 0xB6, + 0x16, 0xC3, 0x96, 0x9D, 0x38, 0xB0, 0x7D, 0x82, 0x87, 0x0C, + 0xF7, 0xBE, 0x24, 0xE5, 0x5F, 0x41, 0x04, 0x79, 0x76, 0x40, + 0xE7, 0x00, 0x22, 0x7E, 0xB5, 0x85, 0x7F, 0x8D, 0x01, 0x50, + 0xE9, 0xD3, 0x29, 0x42, 0x08, 0xB3, 0x51, 0x40, 0x7B, 0xD7, + 0x8D, 0xCC, 0x10, 0x01, 0x64, 0x59, 0x28, 0xB6, 0x53, 0xF3, + 0x50, 0x4E, 0xB1, 0xF2, 0x58, 0xCD, 0x6E, 0xF5, 0x56, 0x3E, + 0x66, 0x2F, 0xD7, 0x07, 0x7F, 0x52, 0x4C, 0x13, 0x24, 0xDC, + 0x8E, 0x8D, 0xCC, 0xED, 0x77, 0xC4, 0x21, 0xD2, 0xFD, 0x08, + 0xEA, 0xD7, 0xC0, 0x5C, 0x13, 0x82, 0x81, 0x31, 0x2F, 0x2B, + 0x08, 0xE4, 0x80, 0x04, 0x7A, 0x0C, 0x8A, 0x3C, 0xDC, 0x22, + 0xE4, 0x5A, 0x7A, 0xB0, 0x12, 0x5E, 0x4A, 0x76, 0x94, 0x77, + 0xC2, 0x0E, 0x92, 0xBA, 0x8A, 0xA0, 0x1F, 0x14, 0x51, 0x1E, + 0x66, 0x6C, 0x38, 0x03, 0x6C, 0xC7, 0x4A, 0x4B, 0x70, 0x80, + 0xAF, 0xCA, 0x84, 0x51, 0xD8, 0xD2, 0x26, 0x49, 0xF5, 0xA8, + 0x5E, 0x35, 0x4B, 0xAC, 0xCE, 0x29, 0x92, 0x33, 0xB7, 0xA2, + 0x69, 0x7D, 0x0C, 0xE0, 0x9C, 0xDB, 0x04, 0xD6, 0xB4, 0xBC, + 0x39, 0xD7, 0x7F, 0x9E, 0x9D, 0x78, 0x38, 0x7F, 0x51, 0x54, + 0x50, 0x8B, 0x9E, 0x9C, 0x03, 0x6C, 0xF5, 0x9D, 0x2C, 0x74, + 0x57, 0xF0, 0x27, 0x2A, 0xC3, 0x47, 0xCA, 0xB9, 0xD7, 0x5C, + 0xFF, 0xC2, 0xAC, 0x65, 0x4E, 0xBD +}; + +/* + * We need temporary values for at least 7 integers of the same size + * as a factor (including header word); more space helps with performance + * (in modular exponentiations), but we much prefer to remain under + * 2 kilobytes in total, to save stack space. The macro TEMPS below + * exceeds 512 (which is a count in 32-bit words) when BR_MAX_RSA_SIZE + * is greater than 4464 (default value is 4096, so the 2-kB limit is + * maintained unless BR_MAX_RSA_SIZE was modified). + */ +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define ROUND2(x) ((((x) + 1) >> 1) << 1) + +#define TEMPS MAX(512, ROUND2(7 * ((((BR_MAX_RSA_SIZE + 1) >> 1) + 61) / 31))) + +/* + * Perform trial division on a candidate prime. This computes + * y = SMALL_PRIMES mod x, then tries to compute y/y mod x. The + * br_i31_moddiv() function will report an error if y is not invertible + * modulo x. Returned value is 1 on success (none of the small primes + * divides x), 0 on error (a non-trivial GCD is obtained). + * + * This function assumes that x is odd. + */ +static uint32_t +trial_divisions(const uint32_t *x, uint32_t *t) +{ + uint32_t *y; + uint32_t x0i; + + y = t; + t += 1 + ((x[0] + 31) >> 5); + x0i = br_i31_ninv31(x[1]); + br_i31_decode_reduce(y, SMALL_PRIMES, sizeof SMALL_PRIMES, x); + return br_i31_moddiv(y, y, x, x0i, t); +} + +/* + * Perform n rounds of Miller-Rabin on the candidate prime x. This + * function assumes that x = 3 mod 4. + * + * Returned value is 1 on success (all rounds completed successfully), + * 0 otherwise. + */ +static uint32_t +miller_rabin(const br_prng_class **rng, const uint32_t *x, int n, + uint32_t *t, size_t tlen, br_i31_modpow_opt_type mp31) +{ + /* + * Since x = 3 mod 4, the Miller-Rabin test is simple: + * - get a random base a (such that 1 < a < x-1) + * - compute z = a^((x-1)/2) mod x + * - if z != 1 and z != x-1, the number x is composite + * + * We generate bases 'a' randomly with a size which is + * one bit less than x, which ensures that a < x-1. It + * is not useful to verify that a > 1 because the probability + * that we get a value a equal to 0 or 1 is much smaller + * than the probability of our Miller-Rabin tests not to + * detect a composite, which is already quite smaller than the + * probability of the hardware misbehaving and return a + * composite integer because of some glitch (e.g. bad RAM + * or ill-timed cosmic ray). + */ + unsigned char *xm1d2; + size_t xlen, xm1d2_len, xm1d2_len_u32, u; + uint32_t asize; + unsigned cc; + uint32_t x0i; + + /* + * Compute (x-1)/2 (encoded). + */ + xm1d2 = (unsigned char *)t; + xm1d2_len = ((x[0] - (x[0] >> 5)) + 7) >> 3; + br_i31_encode(xm1d2, xm1d2_len, x); + cc = 0; + for (u = 0; u < xm1d2_len; u ++) { + unsigned w; + + w = xm1d2[u]; + xm1d2[u] = (unsigned char)((w >> 1) | cc); + cc = w << 7; + } + + /* + * We used some words of the provided buffer for (x-1)/2. + */ + xm1d2_len_u32 = (xm1d2_len + 3) >> 2; + t += xm1d2_len_u32; + tlen -= xm1d2_len_u32; + + xlen = (x[0] + 31) >> 5; + asize = x[0] - 1 - EQ0(x[0] & 31); + x0i = br_i31_ninv31(x[1]); + while (n -- > 0) { + uint32_t *a, *t2; + uint32_t eq1, eqm1; + size_t t2len; + + /* + * Generate a random base. We don't need the base to be + * really uniform modulo x, so we just get a random + * number which is one bit shorter than x. + */ + a = t; + a[0] = x[0]; + a[xlen] = 0; + mkrand(rng, a, asize); + + /* + * Compute a^((x-1)/2) mod x. We assume here that the + * function will not fail (the temporary array is large + * enough). + */ + t2 = t + 1 + xlen; + t2len = tlen - 1 - xlen; + if ((t2len & 1) != 0) { + /* + * Since the source array is 64-bit aligned and + * has an even number of elements (TEMPS), we + * can use the parity of the remaining length to + * detect and adjust alignment. + */ + t2 ++; + t2len --; + } + mp31(a, xm1d2, xm1d2_len, x, x0i, t2, t2len); + + /* + * We must obtain either 1 or x-1. Note that x is odd, + * hence x-1 differs from x only in its low word (no + * carry). + */ + eq1 = a[1] ^ 1; + eqm1 = a[1] ^ (x[1] - 1); + for (u = 2; u <= xlen; u ++) { + eq1 |= a[u]; + eqm1 |= a[u] ^ x[u]; + } + + if ((EQ0(eq1) | EQ0(eqm1)) == 0) { + return 0; + } + } + return 1; +} + +/* + * Create a random prime of the provided size. 'size' is the _encoded_ + * bit length. The two top bits and the two bottom bits are set to 1. + */ +static void +mkprime(const br_prng_class **rng, uint32_t *x, uint32_t esize, + uint32_t pubexp, uint32_t *t, size_t tlen, br_i31_modpow_opt_type mp31) +{ + size_t len; + + x[0] = esize; + len = (esize + 31) >> 5; + for (;;) { + size_t u; + uint32_t m3, m5, m7, m11; + int rounds, s7, s11; + + /* + * Generate random bits. We force the two top bits and the + * two bottom bits to 1. + */ + mkrand(rng, x, esize); + if ((esize & 31) == 0) { + x[len] |= 0x60000000; + } else if ((esize & 31) == 1) { + x[len] |= 0x00000001; + x[len - 1] |= 0x40000000; + } else { + x[len] |= 0x00000003 << ((esize & 31) - 2); + } + x[1] |= 0x00000003; + + /* + * Trial division with low primes (3, 5, 7 and 11). We + * use the following properties: + * + * 2^2 = 1 mod 3 + * 2^4 = 1 mod 5 + * 2^3 = 1 mod 7 + * 2^10 = 1 mod 11 + */ + m3 = 0; + m5 = 0; + m7 = 0; + m11 = 0; + s7 = 0; + s11 = 0; + for (u = 0; u < len; u ++) { + uint32_t w, w3, w5, w7, w11; + + w = x[1 + u]; + w3 = (w & 0xFFFF) + (w >> 16); /* max: 98302 */ + w5 = (w & 0xFFFF) + (w >> 16); /* max: 98302 */ + w7 = (w & 0x7FFF) + (w >> 15); /* max: 98302 */ + w11 = (w & 0xFFFFF) + (w >> 20); /* max: 1050622 */ + + m3 += w3 << (u & 1); + m3 = (m3 & 0xFF) + (m3 >> 8); /* max: 1025 */ + + m5 += w5 << ((4 - u) & 3); + m5 = (m5 & 0xFFF) + (m5 >> 12); /* max: 4479 */ + + m7 += w7 << s7; + m7 = (m7 & 0x1FF) + (m7 >> 9); /* max: 1280 */ + if (++ s7 == 3) { + s7 = 0; + } + + m11 += w11 << s11; + if (++ s11 == 10) { + s11 = 0; + } + m11 = (m11 & 0x3FF) + (m11 >> 10); /* max: 526847 */ + } + + m3 = (m3 & 0x3F) + (m3 >> 6); /* max: 78 */ + m3 = (m3 & 0x0F) + (m3 >> 4); /* max: 18 */ + m3 = ((m3 * 43) >> 5) & 3; + + m5 = (m5 & 0xFF) + (m5 >> 8); /* max: 271 */ + m5 = (m5 & 0x0F) + (m5 >> 4); /* max: 31 */ + m5 -= 20 & -GT(m5, 19); + m5 -= 10 & -GT(m5, 9); + m5 -= 5 & -GT(m5, 4); + + m7 = (m7 & 0x3F) + (m7 >> 6); /* max: 82 */ + m7 = (m7 & 0x07) + (m7 >> 3); /* max: 16 */ + m7 = ((m7 * 147) >> 7) & 7; + + /* + * 2^5 = 32 = -1 mod 11. + */ + m11 = (m11 & 0x3FF) + (m11 >> 10); /* max: 1536 */ + m11 = (m11 & 0x3FF) + (m11 >> 10); /* max: 1023 */ + m11 = (m11 & 0x1F) + 33 - (m11 >> 5); /* max: 64 */ + m11 -= 44 & -GT(m11, 43); + m11 -= 22 & -GT(m11, 21); + m11 -= 11 & -GT(m11, 10); + + /* + * If any of these modulo is 0, then the candidate is + * not prime. Also, if pubexp is 3, 5, 7 or 11, and the + * corresponding modulus is 1, then the candidate must + * be rejected, because we need e to be invertible + * modulo p-1. We can use simple comparisons here + * because they won't leak information on a candidate + * that we keep, only on one that we reject (and is thus + * not secret). + */ + if (m3 == 0 || m5 == 0 || m7 == 0 || m11 == 0) { + continue; + } + if ((pubexp == 3 && m3 == 1) + || (pubexp == 5 && m5 == 5) + || (pubexp == 7 && m5 == 7) + || (pubexp == 11 && m5 == 11)) + { + continue; + } + + /* + * More trial divisions. + */ + if (!trial_divisions(x, t)) { + continue; + } + + /* + * Miller-Rabin algorithm. Since we selected a random + * integer, not a maliciously crafted integer, we can use + * relatively few rounds to lower the risk of a false + * positive (i.e. declaring prime a non-prime) under + * 2^(-80). It is not useful to lower the probability much + * below that, since that would be substantially below + * the probability of the hardware misbehaving. Sufficient + * numbers of rounds are extracted from the Handbook of + * Applied Cryptography, note 4.49 (page 149). + * + * Since we work on the encoded size (esize), we need to + * compare with encoded thresholds. + */ + if (esize < 309) { + rounds = 12; + } else if (esize < 464) { + rounds = 9; + } else if (esize < 670) { + rounds = 6; + } else if (esize < 877) { + rounds = 4; + } else if (esize < 1341) { + rounds = 3; + } else { + rounds = 2; + } + + if (miller_rabin(rng, x, rounds, t, tlen, mp31)) { + return; + } + } +} + +/* + * Let p be a prime (p > 2^33, p = 3 mod 4). Let m = (p-1)/2, provided + * as parameter (with announced bit length equal to that of p). This + * function computes d = 1/e mod p-1 (for an odd integer e). Returned + * value is 1 on success, 0 on error (an error is reported if e is not + * invertible modulo p-1). + * + * The temporary buffer (t) must have room for at least 4 integers of + * the size of p. + */ +static uint32_t +invert_pubexp(uint32_t *d, const uint32_t *m, uint32_t e, uint32_t *t) +{ + uint32_t *f; + uint32_t r; + + f = t; + t += 1 + ((m[0] + 31) >> 5); + + /* + * Compute d = 1/e mod m. Since p = 3 mod 4, m is odd. + */ + br_i31_zero(d, m[0]); + d[1] = 1; + br_i31_zero(f, m[0]); + f[1] = e & 0x7FFFFFFF; + f[2] = e >> 31; + r = br_i31_moddiv(d, f, m, br_i31_ninv31(m[1]), t); + + /* + * We really want d = 1/e mod p-1, with p = 2m. By the CRT, + * the result is either the d we got, or d + m. + * + * Let's write e*d = 1 + k*m, for some integer k. Integers e + * and m are odd. If d is odd, then e*d is odd, which implies + * that k must be even; in that case, e*d = 1 + (k/2)*2m, and + * thus d is already fine. Conversely, if d is even, then k + * is odd, and we must add m to d in order to get the correct + * result. + */ + br_i31_add(d, m, (uint32_t)(1 - (d[1] & 1))); + + return r; +} + +/* + * Swap two buffers in RAM. They must be disjoint. + */ +static void +bufswap(void *b1, void *b2, size_t len) +{ + size_t u; + unsigned char *buf1, *buf2; + + buf1 = b1; + buf2 = b2; + for (u = 0; u < len; u ++) { + unsigned w; + + w = buf1[u]; + buf1[u] = buf2[u]; + buf2[u] = w; + } +} + +/* see inner.h */ +uint32_t +br_rsa_i31_keygen_inner(const br_prng_class **rng, + br_rsa_private_key *sk, unsigned char *kbuf_priv, + br_rsa_public_key *pk, unsigned char *kbuf_pub, + unsigned size, uint32_t pubexp, br_i31_modpow_opt_type mp31) +{ + uint32_t esize_p, esize_q; + size_t plen, qlen, tlen; + uint32_t *p, *q, *t; + union { + uint32_t t32[TEMPS]; + uint64_t t64[TEMPS >> 1]; /* for 64-bit alignment */ + } tmp; + uint32_t r; + + if (size < BR_MIN_RSA_SIZE || size > BR_MAX_RSA_SIZE) { + return 0; + } + if (pubexp == 0) { + pubexp = 3; + } else if (pubexp == 1 || (pubexp & 1) == 0) { + return 0; + } + + esize_p = (size + 1) >> 1; + esize_q = size - esize_p; + sk->n_bitlen = size; + sk->p = kbuf_priv; + sk->plen = (esize_p + 7) >> 3; + sk->q = sk->p + sk->plen; + sk->qlen = (esize_q + 7) >> 3; + sk->dp = sk->q + sk->qlen; + sk->dplen = sk->plen; + sk->dq = sk->dp + sk->dplen; + sk->dqlen = sk->qlen; + sk->iq = sk->dq + sk->dqlen; + sk->iqlen = sk->plen; + + if (pk != NULL) { + pk->n = kbuf_pub; + pk->nlen = (size + 7) >> 3; + pk->e = pk->n + pk->nlen; + pk->elen = 4; + br_enc32be(pk->e, pubexp); + while (*pk->e == 0) { + pk->e ++; + pk->elen --; + } + } + + /* + * We now switch to encoded sizes. + * + * floor((x * 16913) / (2^19)) is equal to floor(x/31) for all + * integers x from 0 to 34966; the intermediate product fits on + * 30 bits, thus we can use MUL31(). + */ + esize_p += MUL31(esize_p, 16913) >> 19; + esize_q += MUL31(esize_q, 16913) >> 19; + plen = (esize_p + 31) >> 5; + qlen = (esize_q + 31) >> 5; + p = tmp.t32; + q = p + 1 + plen; + t = q + 1 + qlen; + tlen = ((sizeof tmp.t32) / sizeof(uint32_t)) - (2 + plen + qlen); + + /* + * When looking for primes p and q, we temporarily divide + * candidates by 2, in order to compute the inverse of the + * public exponent. + */ + + for (;;) { + mkprime(rng, p, esize_p, pubexp, t, tlen, mp31); + br_i31_rshift(p, 1); + if (invert_pubexp(t, p, pubexp, t + 1 + plen)) { + br_i31_add(p, p, 1); + p[1] |= 1; + br_i31_encode(sk->p, sk->plen, p); + br_i31_encode(sk->dp, sk->dplen, t); + break; + } + } + + for (;;) { + mkprime(rng, q, esize_q, pubexp, t, tlen, mp31); + br_i31_rshift(q, 1); + if (invert_pubexp(t, q, pubexp, t + 1 + qlen)) { + br_i31_add(q, q, 1); + q[1] |= 1; + br_i31_encode(sk->q, sk->qlen, q); + br_i31_encode(sk->dq, sk->dqlen, t); + break; + } + } + + /* + * If p and q have the same size, then it is possible that q > p + * (when the target modulus size is odd, we generate p with a + * greater bit length than q). If q > p, we want to swap p and q + * (and also dp and dq) for two reasons: + * - The final step below (inversion of q modulo p) is easier if + * p > q. + * - While BearSSL's RSA code is perfectly happy with RSA keys such + * that p < q, some other implementations have restrictions and + * require p > q. + * + * Note that we can do a simple non-constant-time swap here, + * because the only information we leak here is that we insist on + * returning p and q such that p > q, which is not a secret. + */ + if (esize_p == esize_q && br_i31_sub(p, q, 0) == 1) { + bufswap(p, q, (1 + plen) * sizeof *p); + bufswap(sk->p, sk->q, sk->plen); + bufswap(sk->dp, sk->dq, sk->dplen); + } + + /* + * We have produced p, q, dp and dq. We can now compute iq = 1/d mod p. + * + * We ensured that p >= q, so this is just a matter of updating the + * header word for q (and possibly adding an extra word). + * + * Theoretically, the call below may fail, in case we were + * extraordinarily unlucky, and p = q. Another failure case is if + * Miller-Rabin failed us _twice_, and p and q are non-prime and + * have a factor is common. We report the error mostly because it + * is cheap and we can, but in practice this never happens (or, at + * least, it happens way less often than hardware glitches). + */ + q[0] = p[0]; + if (plen > qlen) { + q[plen] = 0; + t ++; + tlen --; + } + br_i31_zero(t, p[0]); + t[1] = 1; + r = br_i31_moddiv(t, q, p, br_i31_ninv31(p[1]), t + 1 + plen); + br_i31_encode(sk->iq, sk->iqlen, t); + + /* + * Compute the public modulus too, if required. + */ + if (pk != NULL) { + br_i31_zero(t, p[0]); + br_i31_mulacc(t, p, q); + br_i31_encode(pk->n, pk->nlen, t); + } + + return r; +} diff --git a/src/rsa/rsa_i62_keygen.c b/src/rsa/rsa_i62_keygen.c new file mode 100644 index 0000000..7d60af9 --- /dev/null +++ b/src/rsa/rsa_i62_keygen.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#if BR_INT128 || BR_UMUL128 + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i62_keygen(const br_prng_class **rng, + br_rsa_private_key *sk, unsigned char *kbuf_priv, + br_rsa_public_key *pk, unsigned char *kbuf_pub, + unsigned size, uint32_t pubexp) +{ + return br_rsa_i31_keygen_inner(rng, + sk, kbuf_priv, pk, kbuf_pub, size, pubexp, + &br_i62_modpow_opt_as_i31); +} + +/* see bearssl_rsa.h */ +br_rsa_keygen +br_rsa_i62_keygen_get() +{ + return &br_rsa_i62_keygen; +} + +#else + +/* see bearssl_rsa.h */ +br_rsa_keygen +br_rsa_i62_keygen_get() +{ + return 0; +} + +#endif diff --git a/test/test_crypto.c b/test/test_crypto.c index acefde1..acfabd0 100644 --- a/test/test_crypto.c +++ b/test/test_crypto.c @@ -5685,6 +5685,122 @@ test_RSA_OAEP(const char *name, fflush(stdout); } +static void +test_RSA_keygen(const char *name, br_rsa_keygen kg, + br_rsa_pkcs1_sign sign, br_rsa_pkcs1_vrfy vrfy) +{ + br_hmac_drbg_context rng; + int i; + + printf("Test %s: ", name); + fflush(stdout); + + br_hmac_drbg_init(&rng, &br_sha256_vtable, "seed for RSA keygen", 19); + + for (i = 0; i < 40; i ++) { + unsigned size; + uint32_t pubexp; + br_rsa_private_key sk; + br_rsa_public_key pk; + unsigned char kbuf_priv[BR_RSA_KBUF_PRIV_SIZE(2048)]; + unsigned char kbuf_pub[BR_RSA_KBUF_PUB_SIZE(2048)]; + uint32_t mod[256]; + uint32_t cc; + size_t u, v; + unsigned char sig[257], hv[32], hv2[sizeof hv]; + unsigned mask1, mask2; + + if (i <= 35) { + size = 1024 + i; + pubexp = 17; + } else { + size = 2048; + pubexp = (i << 1) - 69; + } + + if (!kg(&rng.vtable, + &sk, kbuf_priv, &pk, kbuf_pub, size, pubexp)) + { + fprintf(stderr, "RSA key pair generation failure\n"); + exit(EXIT_FAILURE); + } + + for (u = pk.elen; u > 0; u --) { + if (pk.e[u - 1] != (pubexp & 0xFF)) { + fprintf(stderr, "wrong public exponent\n"); + exit(EXIT_FAILURE); + } + pubexp >>= 8; + } + if (pubexp != 0) { + fprintf(stderr, "truncated public exponent\n"); + exit(EXIT_FAILURE); + } + + memset(mod, 0, sizeof mod); + for (u = 0; u < sk.plen; u ++) { + for (v = 0; v < sk.qlen; v ++) { + mod[u + v] += (uint32_t)sk.p[sk.plen - 1 - u] + * (uint32_t)sk.q[sk.qlen - 1 - v]; + } + } + cc = 0; + for (u = 0; u < sk.plen + sk.qlen; u ++) { + mod[u] += cc; + cc = mod[u] >> 8; + mod[u] &= 0xFF; + } + for (u = 0; u < pk.nlen; u ++) { + if (mod[pk.nlen - 1 - u] != pk.n[u]) { + fprintf(stderr, "wrong modulus\n"); + exit(EXIT_FAILURE); + } + } + if (sk.n_bitlen != size) { + fprintf(stderr, "wrong key size\n"); + exit(EXIT_FAILURE); + } + if (pk.nlen != (size + 7) >> 3) { + fprintf(stderr, "wrong modulus size (bytes)\n"); + exit(EXIT_FAILURE); + } + mask1 = 0x01 << ((size + 7) & 7); + mask2 = 0xFF & -mask1; + if ((pk.n[0] & mask2) != mask1) { + fprintf(stderr, "wrong modulus size (bits)\n"); + exit(EXIT_FAILURE); + } + + rng.vtable->generate(&rng.vtable, hv, sizeof hv); + memset(sig, 0, sizeof sig); + sig[pk.nlen] = 0x00; + if (!sign(BR_HASH_OID_SHA256, hv, sizeof hv, &sk, sig)) { + fprintf(stderr, "signature error\n"); + exit(EXIT_FAILURE); + } + if (sig[pk.nlen] != 0x00) { + fprintf(stderr, "signature length error\n"); + exit(EXIT_FAILURE); + } + if (!vrfy(sig, pk.nlen, BR_HASH_OID_SHA256, sizeof hv, + &pk, hv2)) + { + fprintf(stderr, "signature verification error (1)\n"); + exit(EXIT_FAILURE); + } + if (memcmp(hv, hv2, sizeof hv) != 0) { + fprintf(stderr, "signature verification error (2)\n"); + exit(EXIT_FAILURE); + } + + printf("."); + fflush(stdout); + } + + printf(" done.\n"); + fflush(stdout); +} + static void test_RSA_i15(void) { @@ -5693,6 +5809,8 @@ test_RSA_i15(void) &br_rsa_i15_pkcs1_sign, &br_rsa_i15_pkcs1_vrfy); test_RSA_OAEP("RSA i15 OAEP", &br_rsa_i15_oaep_encrypt, &br_rsa_i15_oaep_decrypt); + test_RSA_keygen("RSA i15 keygen", &br_rsa_i15_keygen, + &br_rsa_i15_pkcs1_sign, &br_rsa_i15_pkcs1_vrfy); } static void @@ -5703,6 +5821,8 @@ test_RSA_i31(void) &br_rsa_i31_pkcs1_sign, &br_rsa_i31_pkcs1_vrfy); test_RSA_OAEP("RSA i31 OAEP", &br_rsa_i31_oaep_encrypt, &br_rsa_i31_oaep_decrypt); + test_RSA_keygen("RSA i31 keygen", &br_rsa_i31_keygen, + &br_rsa_i31_pkcs1_sign, &br_rsa_i31_pkcs1_vrfy); } static void @@ -5724,6 +5844,7 @@ test_RSA_i62(void) br_rsa_pkcs1_vrfy vrfy; br_rsa_oaep_encrypt menc; br_rsa_oaep_decrypt mdec; + br_rsa_keygen kgen; pub = br_rsa_i62_public_get(); priv = br_rsa_i62_private_get(); @@ -5731,16 +5852,18 @@ test_RSA_i62(void) vrfy = br_rsa_i62_pkcs1_vrfy_get(); menc = br_rsa_i62_oaep_encrypt_get(); mdec = br_rsa_i62_oaep_decrypt_get(); + kgen = br_rsa_i62_keygen_get(); if (pub) { - if (!priv || !sign || !vrfy || !menc || !mdec) { + if (!priv || !sign || !vrfy || !menc || !mdec || !kgen) { fprintf(stderr, "Inconsistent i62 availability\n"); exit(EXIT_FAILURE); } test_RSA_core("RSA i62 core", pub, priv); test_RSA_sign("RSA i62 sign", priv, sign, vrfy); test_RSA_OAEP("RSA i62 OAEP", menc, mdec); + test_RSA_keygen("RSA i62 keygen", kgen, sign, vrfy); } else { - if (priv || sign || vrfy || menc || mdec) { + if (priv || sign || vrfy || menc || mdec || kgen) { fprintf(stderr, "Inconsistent i62 availability\n"); exit(EXIT_FAILURE); } diff --git a/test/test_speed.c b/test/test_speed.c index 3ea9b99..43d062a 100644 --- a/test/test_speed.c +++ b/test/test_speed.c @@ -679,11 +679,12 @@ static const br_rsa_private_key RSA_SK = { static void test_speed_rsa_inner(char *name, - br_rsa_public fpub, br_rsa_private fpriv) + br_rsa_public fpub, br_rsa_private fpriv, br_rsa_keygen kgen) { unsigned char tmp[sizeof RSA_N]; int i; long num; + br_hmac_drbg_context rng; memset(tmp, 'R', sizeof tmp); tmp[0] = 0; @@ -737,27 +738,82 @@ test_speed_rsa_inner(char *name, } num <<= 1; } + + if (kgen == 0) { + printf("%-30s KEYGEN UNAVAILABLE\n", name); + fflush(stdout); + return; + } + br_hmac_drbg_init(&rng, &br_sha256_vtable, "RSA keygen seed", 15); + + num = 10; + for (;;) { + clock_t begin, end; + double tt; + long k; + + begin = clock(); + for (k = num; k > 0; k --) { + br_rsa_private_key sk; + unsigned char kbuf[BR_RSA_KBUF_PRIV_SIZE(1024)]; + + kgen(&rng.vtable, &sk, kbuf, NULL, NULL, 1024, 0); + } + end = clock(); + tt = (double)(end - begin) / CLOCKS_PER_SEC; + if (tt >= 10.0) { + printf("%-30s %8.2f kgen[1024]/s\n", name, + (double)num / tt); + fflush(stdout); + break; + } + num <<= 1; + } + + num = 10; + for (;;) { + clock_t begin, end; + double tt; + long k; + + begin = clock(); + for (k = num; k > 0; k --) { + br_rsa_private_key sk; + unsigned char kbuf[BR_RSA_KBUF_PRIV_SIZE(2048)]; + + kgen(&rng.vtable, &sk, kbuf, NULL, NULL, 2048, 0); + } + end = clock(); + tt = (double)(end - begin) / CLOCKS_PER_SEC; + if (tt >= 10.0) { + printf("%-30s %8.2f kgen[2048]/s\n", name, + (double)num / tt); + fflush(stdout); + break; + } + num <<= 1; + } } static void test_speed_rsa_i15(void) { test_speed_rsa_inner("RSA i15", - &br_rsa_i15_public, &br_rsa_i15_private); + &br_rsa_i15_public, &br_rsa_i15_private, &br_rsa_i15_keygen); } static void test_speed_rsa_i31(void) { test_speed_rsa_inner("RSA i31", - &br_rsa_i31_public, &br_rsa_i31_private); + &br_rsa_i31_public, &br_rsa_i31_private, &br_rsa_i31_keygen); } static void test_speed_rsa_i32(void) { test_speed_rsa_inner("RSA i32", - &br_rsa_i32_public, &br_rsa_i32_private); + &br_rsa_i32_public, &br_rsa_i32_private, 0); } static void @@ -765,11 +821,13 @@ test_speed_rsa_i62(void) { br_rsa_public pub; br_rsa_private priv; + br_rsa_keygen kgen; pub = br_rsa_i62_public_get(); priv = br_rsa_i62_private_get(); + kgen = br_rsa_i62_keygen_get(); if (pub) { - test_speed_rsa_inner("RSA i62", pub, priv); + test_speed_rsa_inner("RSA i62", pub, priv, kgen); } else { printf("%-30s UNAVAILABLE\n", "RSA i62"); } diff --git a/tools/skey.c b/tools/skey.c index a9ecb32..0f672bf 100644 --- a/tools/skey.c +++ b/tools/skey.c @@ -105,6 +105,90 @@ print_ec(const br_ec_private_key *sk, int print_text, int print_C) } } +static int +parse_rsa_spec(const char *kgen_spec, unsigned *size, uint32_t *pubexp) +{ + const char *p; + char *end; + unsigned long ul; + + p = kgen_spec; + if (*p != 'r' && *p != 'R') { + return 0; + } + p ++; + if (*p != 's' && *p != 'S') { + return 0; + } + p ++; + if (*p != 'a' && *p != 'A') { + return 0; + } + p ++; + if (*p == 0) { + *size = 2048; + *pubexp = 3; + return 1; + } else if (*p != ':') { + return 0; + } + p ++; + ul = strtoul(p, &end, 10); + if (ul < 512 || ul > 32768) { + return 0; + } + *size = ul; + p = end; + if (*p == 0) { + *pubexp = 3; + return 1; + } else if (*p != ':') { + return 0; + } + p ++; + ul = strtoul(p, &end, 10); + if ((ul & 1) == 0 || ul == 1 || ((ul >> 30) >> 2) != 0) { + return 0; + } + *pubexp = ul; + if (*end != 0) { + return 0; + } + return 1; +} + +static int +keygen_rsa(unsigned size, uint32_t pubexp, int print_text, int print_C) +{ + br_hmac_drbg_context rng; + br_prng_seeder seeder; + br_rsa_keygen kg; + br_rsa_private_key sk; + unsigned char *kbuf_priv; + uint32_t r; + + seeder = br_prng_seeder_system(NULL); + if (seeder == 0) { + fprintf(stderr, "ERROR: no system source of randomness\n"); + return 0; + } + br_hmac_drbg_init(&rng, &br_sha256_vtable, NULL, 0); + if (!seeder(&rng.vtable)) { + fprintf(stderr, "ERROR: system source of randomness failed\n"); + return 0; + } + kbuf_priv = xmalloc(BR_RSA_KBUF_PRIV_SIZE(size)); + kg = br_rsa_keygen_get_default(); + r = kg(&rng.vtable, &sk, kbuf_priv, NULL, NULL, size, pubexp); + if (!r) { + fprintf(stderr, "ERROR: RSA key pair generation failed\n"); + } else { + print_rsa(&sk, print_text, print_C); + } + xfree(kbuf_priv); + return r; +} + static int decode_key(const unsigned char *buf, size_t len, int print_text, int print_C) { @@ -165,6 +249,14 @@ usage_skey(void) " -text print public key details (human-readable)\n"); fprintf(stderr, " -C print public key details (C code)\n"); + fprintf(stderr, +" -gen spec generate a new key using the provided key specification\n"); + fprintf(stderr, +"Key specification begins with a key type, followed by optional parameters\n"); + fprintf(stderr, +"that depend on the key type, separated by colon characters:\n"); + fprintf(stderr, +" rsa[:size[:pubexep]] RSA key (defaults: size = 2048, pubexp = 3)\n"); } /* see brssl.h */ @@ -178,6 +270,7 @@ do_skey(int argc, char *argv[]) unsigned char *buf; size_t len; pem_object *pos; + const char *kgen_spec; retcode = 0; verbose = 1; @@ -186,6 +279,7 @@ do_skey(int argc, char *argv[]) num_files = 0; buf = NULL; pos = NULL; + kgen_spec = NULL; for (i = 0; i < argc; i ++) { const char *arg; @@ -203,13 +297,48 @@ do_skey(int argc, char *argv[]) print_text = 1; } else if (eqstr(arg, "-C")) { print_C = 1; + } else if (eqstr(arg, "-gen")) { + if (++ i >= argc) { + fprintf(stderr, + "ERROR: no argument for '-gen'\n"); + usage_skey(); + goto skey_exit_error; + } + if (kgen_spec != NULL) { + fprintf(stderr, + "ERROR: multiple '-gen' options\n"); + usage_skey(); + goto skey_exit_error; + } + kgen_spec = argv[i]; + argv[i] = NULL; } else { fprintf(stderr, "ERROR: unknown option: '%s'\n", arg); usage_skey(); goto skey_exit_error; } } - if (num_files == 0) { + if (kgen_spec != NULL) { + unsigned rsa_size; + uint32_t rsa_pubexp; + + if (num_files != 0) { + fprintf(stderr, + "ERROR: key files provided while generating\n"); + usage_skey(); + goto skey_exit_error; + } + + if (parse_rsa_spec(kgen_spec, &rsa_size, &rsa_pubexp)) { + keygen_rsa(rsa_size, rsa_pubexp, print_text, print_C); + } else { + fprintf(stderr, + "ERROR: unknown key specification: '%s'\n", + kgen_spec); + usage_skey(); + goto skey_exit_error; + } + } else if (num_files == 0) { fprintf(stderr, "ERROR: no private key provided\n"); usage_skey(); goto skey_exit_error;