From: Thomas Pornin Date: Wed, 4 Jan 2017 18:21:09 +0000 (+0100) Subject: New "i15" implementation of big integers (faster, and constant-time, on ARM Cortex... X-Git-Tag: v0.4~27 X-Git-Url: https://bearssl.org/gitweb//home/git/?a=commitdiff_plain;h=28e4e120b84dacdf53963639f1a8a6fec2793662;p=BearSSL New "i15" implementation of big integers (faster, and constant-time, on ARM Cortex M0/M0+); imported into EC, ECDSA, RSA. --- diff --git a/Makefile b/Makefile index 8c63fb2..32c5d7b 100644 --- a/Makefile +++ b/Makefile @@ -47,17 +47,19 @@ TESTX509 = testx509 TESTMATH = testmath OBJCODEC = $(BUILD)/ccopy.o $(BUILD)/dec16be.o $(BUILD)/dec16le.o $(BUILD)/dec32be.o $(BUILD)/dec32le.o $(BUILD)/dec64be.o $(BUILD)/dec64le.o $(BUILD)/enc16be.o $(BUILD)/enc16le.o $(BUILD)/enc32be.o $(BUILD)/enc32le.o $(BUILD)/enc64be.o $(BUILD)/enc64le.o $(BUILD)/pemdec.o -OBJEC = $(BUILD)/ec_p256_i15.o $(BUILD)/ec_prime_i31.o $(BUILD)/ec_prime_i31_secp256r1.o $(BUILD)/ec_prime_i31_secp384r1.o $(BUILD)/ec_prime_i31_secp521r1.o $(BUILD)/ec_secp256r1.o $(BUILD)/ec_secp384r1.o $(BUILD)/ec_secp521r1.o $(BUILD)/ecdsa_atr.o $(BUILD)/ecdsa_i31_bits.o $(BUILD)/ecdsa_i31_sign_asn1.o $(BUILD)/ecdsa_i31_sign_raw.o $(BUILD)/ecdsa_i31_vrfy_asn1.o $(BUILD)/ecdsa_i31_vrfy_raw.o $(BUILD)/ecdsa_rta.o +OBJEC = $(BUILD)/ec_p256_i15.o $(BUILD)/ec_prime_i15.o $(BUILD)/ec_prime_i31.o $(BUILD)/ec_secp256r1.o $(BUILD)/ec_secp384r1.o $(BUILD)/ec_secp521r1.o $(BUILD)/ecdsa_atr.o $(BUILD)/ecdsa_i15_bits.o $(BUILD)/ecdsa_i15_sign_asn1.o $(BUILD)/ecdsa_i15_sign_raw.o $(BUILD)/ecdsa_i15_vrfy_asn1.o $(BUILD)/ecdsa_i15_vrfy_raw.o $(BUILD)/ecdsa_i31_bits.o $(BUILD)/ecdsa_i31_sign_asn1.o $(BUILD)/ecdsa_i31_sign_raw.o $(BUILD)/ecdsa_i31_vrfy_asn1.o $(BUILD)/ecdsa_i31_vrfy_raw.o $(BUILD)/ecdsa_rta.o +# $(BUILD)/ec_prime_i31_secp256r1.o $(BUILD)/ec_prime_i31_secp384r1.o $(BUILD)/ec_prime_i31_secp521r1.o OBJHASH = $(BUILD)/dig_oid.o $(BUILD)/dig_size.o $(BUILD)/ghash_ctmul.o $(BUILD)/ghash_ctmul32.o $(BUILD)/ghash_ctmul64.o $(BUILD)/md5.o $(BUILD)/md5sha1.o $(BUILD)/multihash.o $(BUILD)/sha1.o $(BUILD)/sha2big.o $(BUILD)/sha2small.o +OBJINT15 = $(BUILD)/i15_core.o $(BUILD)/i15_ext1.o $(BUILD)/i15_ext2.o OBJINT31 = $(BUILD)/i31_add.o $(BUILD)/i31_bitlen.o $(BUILD)/i31_decmod.o $(BUILD)/i31_decode.o $(BUILD)/i31_decred.o $(BUILD)/i31_encode.o $(BUILD)/i31_fmont.o $(BUILD)/i31_iszero.o $(BUILD)/i31_modpow.o $(BUILD)/i31_montmul.o $(BUILD)/i31_mulacc.o $(BUILD)/i31_muladd.o $(BUILD)/i31_ninv31.o $(BUILD)/i31_reduce.o $(BUILD)/i31_rshift.o $(BUILD)/i31_sub.o $(BUILD)/i31_tmont.o OBJINT32 = $(BUILD)/i32_add.o $(BUILD)/i32_bitlen.o $(BUILD)/i32_decmod.o $(BUILD)/i32_decode.o $(BUILD)/i32_decred.o $(BUILD)/i32_div32.o $(BUILD)/i32_encode.o $(BUILD)/i32_fmont.o $(BUILD)/i32_iszero.o $(BUILD)/i32_modpow.o $(BUILD)/i32_montmul.o $(BUILD)/i32_mulacc.o $(BUILD)/i32_muladd.o $(BUILD)/i32_ninv32.o $(BUILD)/i32_reduce.o $(BUILD)/i32_sub.o $(BUILD)/i32_tmont.o OBJMAC = $(BUILD)/hmac.o $(BUILD)/hmac_ct.o OBJRAND = $(BUILD)/hmac_drbg.o -OBJRSA = $(BUILD)/rsa_i31_pkcs1_sign.o $(BUILD)/rsa_i31_pkcs1_vrfy.o $(BUILD)/rsa_i31_priv.o $(BUILD)/rsa_i31_pub.o $(BUILD)/rsa_i32_pkcs1_sign.o $(BUILD)/rsa_i32_pkcs1_vrfy.o $(BUILD)/rsa_i32_priv.o $(BUILD)/rsa_i32_pub.o $(BUILD)/rsa_ssl_decrypt.o +OBJRSA = $(BUILD)/rsa_i15_pkcs1_sign.o $(BUILD)/rsa_i15_pkcs1_vrfy.o $(BUILD)/rsa_i15_priv.o $(BUILD)/rsa_i15_pub.o $(BUILD)/rsa_i31_pkcs1_sign.o $(BUILD)/rsa_i31_pkcs1_vrfy.o $(BUILD)/rsa_i31_priv.o $(BUILD)/rsa_i31_pub.o $(BUILD)/rsa_i32_pkcs1_sign.o $(BUILD)/rsa_i32_pkcs1_vrfy.o $(BUILD)/rsa_i32_priv.o $(BUILD)/rsa_i32_pub.o $(BUILD)/rsa_pkcs1_sig_pad.o $(BUILD)/rsa_pkcs1_sig_unpad.o $(BUILD)/rsa_ssl_decrypt.o OBJSSL = $(BUILD)/prf.o $(BUILD)/prf_md5sha1.o $(BUILD)/prf_sha256.o $(BUILD)/prf_sha384.o $(BUILD)/ssl_ccert_single_ec.o $(BUILD)/ssl_ccert_single_rsa.o $(BUILD)/ssl_client.o $(BUILD)/ssl_client_full.o $(BUILD)/ssl_engine.o $(BUILD)/ssl_hashes.o $(BUILD)/ssl_hs_client.o $(BUILD)/ssl_hs_server.o $(BUILD)/ssl_io.o $(BUILD)/ssl_lru.o $(BUILD)/ssl_rec_cbc.o $(BUILD)/ssl_rec_chapol.o $(BUILD)/ssl_rec_gcm.o $(BUILD)/ssl_server.o $(BUILD)/ssl_server_mine2c.o $(BUILD)/ssl_server_mine2g.o $(BUILD)/ssl_server_minf2c.o $(BUILD)/ssl_server_minf2g.o $(BUILD)/ssl_server_minr2g.o $(BUILD)/ssl_server_minu2g.o $(BUILD)/ssl_server_minv2g.o $(BUILD)/ssl_server_full_ec.o $(BUILD)/ssl_server_full_rsa.o $(BUILD)/ssl_scert_single_ec.o $(BUILD)/ssl_scert_single_rsa.o OBJSYMCIPHER = $(BUILD)/aes_big_cbcdec.o $(BUILD)/aes_big_cbcenc.o $(BUILD)/aes_big_ctr.o $(BUILD)/aes_big_dec.o $(BUILD)/aes_big_enc.o $(BUILD)/aes_common.o $(BUILD)/aes_ct.o $(BUILD)/aes_ct64.o $(BUILD)/aes_ct64_cbcdec.o $(BUILD)/aes_ct64_cbcenc.o $(BUILD)/aes_ct64_ctr.o $(BUILD)/aes_ct64_dec.o $(BUILD)/aes_ct64_enc.o $(BUILD)/aes_ct_cbcdec.o $(BUILD)/aes_ct_cbcenc.o $(BUILD)/aes_ct_ctr.o $(BUILD)/aes_ct_dec.o $(BUILD)/aes_ct_enc.o $(BUILD)/aes_small_cbcdec.o $(BUILD)/aes_small_cbcenc.o $(BUILD)/aes_small_ctr.o $(BUILD)/aes_small_dec.o $(BUILD)/aes_small_enc.o $(BUILD)/chacha20_ct.o $(BUILD)/des_ct.o $(BUILD)/des_ct_cbcdec.o $(BUILD)/des_ct_cbcenc.o $(BUILD)/des_support.o $(BUILD)/des_tab.o $(BUILD)/des_tab_cbcdec.o $(BUILD)/des_tab_cbcenc.o $(BUILD)/poly1305_ctmul.o OBJX509 = $(BUILD)/skey_decoder.o $(BUILD)/x509_decoder.o $(BUILD)/x509_knownkey.o $(BUILD)/x509_minimal.o $(BUILD)/x509_minimal_full.o -OBJ = $(OBJCODEC) $(OBJEC) $(OBJHASH) $(OBJINT31) $(OBJINT32) $(OBJMAC) $(OBJRAND) $(OBJRSA) $(OBJSSL) $(OBJSYMCIPHER) $(OBJX509) +OBJ = $(OBJCODEC) $(OBJEC) $(OBJHASH) $(OBJINT15) $(OBJINT31) $(OBJINT32) $(OBJMAC) $(OBJRAND) $(OBJRSA) $(OBJSSL) $(OBJSYMCIPHER) $(OBJX509) OBJBRSSL = $(BUILD)/brssl.o $(BUILD)/certs.o $(BUILD)/chain.o $(BUILD)/client.o $(BUILD)/errors.o $(BUILD)/files.o $(BUILD)/keys.o $(BUILD)/names.o $(BUILD)/server.o $(BUILD)/skey.o $(BUILD)/sslio.o $(BUILD)/ta.o $(BUILD)/vector.o $(BUILD)/verify.o $(BUILD)/xmem.o OBJTESTCRYPTO = $(BUILD)/test_crypto.o OBJTESTSPEED = $(BUILD)/test_speed.o @@ -163,6 +165,9 @@ $(BUILD)/ec_g_secp521r1.o: src/ec/ec_g_secp521r1.c $(HEADERS) $(BUILD)/ec_p256_i15.o: src/ec/ec_p256_i15.c $(HEADERS) $(CC) $(CFLAGS) -c -o $(BUILD)/ec_p256_i15.o src/ec/ec_p256_i15.c +$(BUILD)/ec_prime_i15.o: src/ec/ec_prime_i15.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/ec_prime_i15.o src/ec/ec_prime_i15.c + $(BUILD)/ec_prime_i31.o: src/ec/ec_prime_i31.c $(HEADERS) $(CC) $(CFLAGS) -c -o $(BUILD)/ec_prime_i31.o src/ec/ec_prime_i31.c @@ -187,6 +192,21 @@ $(BUILD)/ec_secp521r1.o: src/ec/ec_secp521r1.c $(HEADERS) $(BUILD)/ecdsa_atr.o: src/ec/ecdsa_atr.c $(HEADERS) $(CC) $(CFLAGS) -c -o $(BUILD)/ecdsa_atr.o src/ec/ecdsa_atr.c +$(BUILD)/ecdsa_i15_bits.o: src/ec/ecdsa_i15_bits.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/ecdsa_i15_bits.o src/ec/ecdsa_i15_bits.c + +$(BUILD)/ecdsa_i15_sign_asn1.o: src/ec/ecdsa_i15_sign_asn1.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/ecdsa_i15_sign_asn1.o src/ec/ecdsa_i15_sign_asn1.c + +$(BUILD)/ecdsa_i15_sign_raw.o: src/ec/ecdsa_i15_sign_raw.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/ecdsa_i15_sign_raw.o src/ec/ecdsa_i15_sign_raw.c + +$(BUILD)/ecdsa_i15_vrfy_asn1.o: src/ec/ecdsa_i15_vrfy_asn1.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/ecdsa_i15_vrfy_asn1.o src/ec/ecdsa_i15_vrfy_asn1.c + +$(BUILD)/ecdsa_i15_vrfy_raw.o: src/ec/ecdsa_i15_vrfy_raw.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/ecdsa_i15_vrfy_raw.o src/ec/ecdsa_i15_vrfy_raw.c + $(BUILD)/ecdsa_i31_bits.o: src/ec/ecdsa_i31_bits.c $(HEADERS) $(CC) $(CFLAGS) -c -o $(BUILD)/ecdsa_i31_bits.o src/ec/ecdsa_i31_bits.c @@ -238,6 +258,15 @@ $(BUILD)/sha2big.o: src/hash/sha2big.c $(HEADERS) $(BUILD)/sha2small.o: src/hash/sha2small.c $(HEADERS) $(CC) $(CFLAGS) -c -o $(BUILD)/sha2small.o src/hash/sha2small.c +$(BUILD)/i15_core.o: src/int/i15_core.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/i15_core.o src/int/i15_core.c + +$(BUILD)/i15_ext1.o: src/int/i15_ext1.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/i15_ext1.o src/int/i15_ext1.c + +$(BUILD)/i15_ext2.o: src/int/i15_ext2.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/i15_ext2.o src/int/i15_ext2.c + $(BUILD)/i31_add.o: src/int/i31_add.c $(HEADERS) $(CC) $(CFLAGS) -c -o $(BUILD)/i31_add.o src/int/i31_add.c @@ -349,6 +378,18 @@ $(BUILD)/hmac_ct.o: src/mac/hmac_ct.c $(HEADERS) $(BUILD)/hmac_drbg.o: src/rand/hmac_drbg.c $(HEADERS) $(CC) $(CFLAGS) -c -o $(BUILD)/hmac_drbg.o src/rand/hmac_drbg.c +$(BUILD)/rsa_i15_pkcs1_sign.o: src/rsa/rsa_i15_pkcs1_sign.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/rsa_i15_pkcs1_sign.o src/rsa/rsa_i15_pkcs1_sign.c + +$(BUILD)/rsa_i15_pkcs1_vrfy.o: src/rsa/rsa_i15_pkcs1_vrfy.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/rsa_i15_pkcs1_vrfy.o src/rsa/rsa_i15_pkcs1_vrfy.c + +$(BUILD)/rsa_i15_priv.o: src/rsa/rsa_i15_priv.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/rsa_i15_priv.o src/rsa/rsa_i15_priv.c + +$(BUILD)/rsa_i15_pub.o: src/rsa/rsa_i15_pub.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/rsa_i15_pub.o src/rsa/rsa_i15_pub.c + $(BUILD)/rsa_i31_pkcs1_sign.o: src/rsa/rsa_i31_pkcs1_sign.c $(HEADERS) $(CC) $(CFLAGS) -c -o $(BUILD)/rsa_i31_pkcs1_sign.o src/rsa/rsa_i31_pkcs1_sign.c @@ -373,6 +414,12 @@ $(BUILD)/rsa_i32_priv.o: src/rsa/rsa_i32_priv.c $(HEADERS) $(BUILD)/rsa_i32_pub.o: src/rsa/rsa_i32_pub.c $(HEADERS) $(CC) $(CFLAGS) -c -o $(BUILD)/rsa_i32_pub.o src/rsa/rsa_i32_pub.c +$(BUILD)/rsa_pkcs1_sig_pad.o: src/rsa/rsa_pkcs1_sig_pad.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/rsa_pkcs1_sig_pad.o src/rsa/rsa_pkcs1_sig_pad.c + +$(BUILD)/rsa_pkcs1_sig_unpad.o: src/rsa/rsa_pkcs1_sig_unpad.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $(BUILD)/rsa_pkcs1_sig_unpad.o src/rsa/rsa_pkcs1_sig_unpad.c + $(BUILD)/rsa_ssl_decrypt.o: src/rsa/rsa_ssl_decrypt.c $(HEADERS) $(CC) $(CFLAGS) -c -o $(BUILD)/rsa_ssl_decrypt.o src/rsa/rsa_ssl_decrypt.c diff --git a/inc/bearssl_ec.h b/inc/bearssl_ec.h index 336c89c..69ad29e 100644 --- a/inc/bearssl_ec.h +++ b/inc/bearssl_ec.h @@ -369,6 +369,15 @@ typedef struct { */ extern const br_ec_impl br_ec_prime_i31; +/** + * \brief EC implementation "i15". + * + * This implementation internally uses generic code for modular integers, + * with a representation as sequences of 15-bit words. It supports secp256r1, + * secp384r1 and secp521r1 (aka NIST curves P-256, P-384 and P-521). + */ +extern const br_ec_impl br_ec_prime_i15; + /** * \brief EC implementation "i15" for P-256. * @@ -533,4 +542,70 @@ uint32_t br_ecdsa_i31_vrfy_raw(const br_ec_impl *impl, const void *hash, size_t hash_len, const br_ec_public_key *pk, const void *sig, size_t sig_len); +/** + * \brief ECDSA signature generator, "i15" implementation, "asn1" format. + * + * \see br_ecdsa_sign() + * + * \param impl EC implementation to use. + * \param hf hash function used to process the data. + * \param hash_value signed data (hashed). + * \param sk EC private key. + * \param sig destination buffer. + * \return the signature length (in bytes), or 0 on error. + */ +size_t br_ecdsa_i15_sign_asn1(const br_ec_impl *impl, + const br_hash_class *hf, const void *hash_value, + const br_ec_private_key *sk, void *sig); + +/** + * \brief ECDSA signature generator, "i15" implementation, "raw" format. + * + * \see br_ecdsa_sign() + * + * \param impl EC implementation to use. + * \param hf hash function used to process the data. + * \param hash_value signed data (hashed). + * \param sk EC private key. + * \param sig destination buffer. + * \return the signature length (in bytes), or 0 on error. + */ +size_t br_ecdsa_i15_sign_raw(const br_ec_impl *impl, + const br_hash_class *hf, const void *hash_value, + const br_ec_private_key *sk, void *sig); + +/** + * \brief ECDSA signature verifier, "i15" implementation, "asn1" format. + * + * \see br_ecdsa_vrfy() + * + * \param impl EC implementation to use. + * \param hash signed data (hashed). + * \param hash_len hash value length (in bytes). + * \param pk EC public key. + * \param sig signature. + * \param sig_len signature length (in bytes). + * \return 1 on success, 0 on error. + */ +uint32_t br_ecdsa_i15_vrfy_asn1(const br_ec_impl *impl, + const void *hash, size_t hash_len, + const br_ec_public_key *pk, const void *sig, size_t sig_len); + +/** + * \brief ECDSA signature verifier, "i15" implementation, "raw" format. + * + * \see br_ecdsa_vrfy() + * + * \param impl EC implementation to use. + * \param hash signed data (hashed). + * \param hash_len hash value length (in bytes). + * \param pk EC public key. + * \param sig signature. + * \param sig_len signature length (in bytes). + * \return 1 on success, 0 on error. + */ +uint32_t br_ecdsa_i15_vrfy_raw(const br_ec_impl *impl, + const void *hash, size_t hash_len, + const br_ec_public_key *pk, const void *sig, size_t sig_len); + #endif diff --git a/inc/bearssl_rsa.h b/inc/bearssl_rsa.h index f1bdf2b..b9362a3 100644 --- a/inc/bearssl_rsa.h +++ b/inc/bearssl_rsa.h @@ -121,7 +121,7 @@ * * ## Implementations * - * Two RSA implementations are included: + * Three RSA implementations are included: * * - The **i32** implementation internally represents big integers * as arrays of 32-bit integers. It is perfunctory and portable, @@ -132,6 +132,12 @@ * faster than the i32 implementation (the reduced integer size makes * carry propagation easier) for a similar code footprint, but uses * very slightly larger stack buffers (about 4% bigger). + * + * - The **i15** implementation uses 16-bit integers, each containing + * 15 bits worth of integer data. Multiplication results fit on + * 32 bits, so this won't use the "widening" multiplication routine + * on ARM Cortex M0/M0+, for much better performance and constant-time + * execution. */ /** @@ -445,6 +451,70 @@ uint32_t br_rsa_i31_pkcs1_sign(const unsigned char *hash_oid, const unsigned char *hash, size_t hash_len, const br_rsa_private_key *sk, unsigned char *x); +/* + * RSA "i15" engine. Integers are represented as 15-bit integers, so + * the code uses only 32-bit multiplication (no 64-bit result), which + * is vastly faster (and constant-time) on the ARM Cortex M0/M0+. + */ + +/** + * \brief RSA public key engine "i15". + * + * \see br_rsa_public + * + * \param x operand to exponentiate. + * \param xlen length of the operand (in bytes). + * \param pk RSA public key. + * \return 1 on success, 0 on error. + */ +uint32_t br_rsa_i15_public(unsigned char *x, size_t xlen, + const br_rsa_public_key *pk); + +/** + * \brief RSA signature verification engine "i15". + * + * \see br_rsa_pkcs1_vrfy + * + * \param x signature buffer. + * \param xlen signature length (in bytes). + * \param hash_oid encoded hash algorithm OID (or `NULL`). + * \param hash_len expected hash value length (in bytes). + * \param pk RSA public key. + * \param hash_out output buffer for the hash value. + * \return 1 on success, 0 on error. + */ +uint32_t br_rsa_i15_pkcs1_vrfy(const unsigned char *x, size_t xlen, + const unsigned char *hash_oid, size_t hash_len, + const br_rsa_public_key *pk, unsigned char *hash_out); + +/** + * \brief RSA private key engine "i15". + * + * \see br_rsa_private + * + * \param x operand to exponentiate. + * \param sk RSA private key. + * \return 1 on success, 0 on error. + */ +uint32_t br_rsa_i15_private(unsigned char *x, + const br_rsa_private_key *sk); + +/** + * \brief RSA signature generation engine "i15". + * + * \see br_rsa_pkcs1_sign + * + * \param hash_oid encoded hash algorithm OID (or `NULL`). + * \param hash hash value. + * \param hash_len hash value length (in bytes). + * \param sk RSA private key. + * \param x output buffer for the hash value. + * \return 1 on success, 0 on error. + */ +uint32_t br_rsa_i15_pkcs1_sign(const unsigned char *hash_oid, + const unsigned char *hash, size_t hash_len, + const br_rsa_private_key *sk, unsigned char *x); + /** * \brief RSA decryption helper, for SSL/TLS. * diff --git a/src/ec/ec_prime_i15.c b/src/ec/ec_prime_i15.c new file mode 100644 index 0000000..04bdd5d --- /dev/null +++ b/src/ec/ec_prime_i15.c @@ -0,0 +1,792 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Parameters for supported curves: + * - field modulus p + * - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p) + * - b*R mod p (b is the second curve equation parameter) + */ + +static const uint16_t P256_P[] = { + 0x0111, + 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x003F, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x4000, 0x7FFF, + 0x7FFF, 0x0001 +}; + +static const uint16_t P256_R2[] = { + 0x0111, + 0x0000, 0x6000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7FFC, 0x7FFF, + 0x7FBF, 0x7FFF, 0x7FBF, 0x7FFF, 0x7FFF, 0x7FFF, 0x77FF, 0x7FFF, + 0x4FFF, 0x0000 +}; + +static const uint16_t P256_B[] = { + 0x0111, + 0x770C, 0x5EEF, 0x29C4, 0x3EC4, 0x6273, 0x0486, 0x4543, 0x3993, + 0x3C01, 0x6B56, 0x212E, 0x57EE, 0x4882, 0x204B, 0x7483, 0x3C16, + 0x0187, 0x0000 +}; + +static const uint16_t P384_P[] = { + 0x0199, + 0x7FFF, 0x7FFF, 0x0003, 0x0000, 0x0000, 0x0000, 0x7FC0, 0x7FFF, + 0x7EFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, + 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, + 0x7FFF, 0x01FF +}; + +static const uint16_t P384_R2[] = { + 0x0199, + 0x1000, 0x0000, 0x0000, 0x7FFF, 0x7FFF, 0x0001, 0x0000, 0x0010, + 0x0000, 0x0000, 0x0000, 0x7F00, 0x7FFF, 0x01FF, 0x0000, 0x1000, + 0x0000, 0x2000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000 +}; + +static const uint16_t P384_B[] = { + 0x0199, + 0x7333, 0x2096, 0x70D1, 0x2310, 0x3020, 0x6197, 0x1464, 0x35BB, + 0x70CA, 0x0117, 0x1920, 0x4136, 0x5FC8, 0x5713, 0x4938, 0x7DD2, + 0x4DD2, 0x4A71, 0x0220, 0x683E, 0x2C87, 0x4DB1, 0x7BFF, 0x6C09, + 0x0452, 0x0084 +}; + +static const uint16_t P521_P[] = { + 0x022B, + 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, + 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, + 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, + 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, + 0x7FFF, 0x7FFF, 0x07FF +}; + +static const uint16_t P521_R2[] = { + 0x022B, + 0x0100, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000 +}; + +static const uint16_t P521_B[] = { + 0x022B, + 0x7002, 0x6A07, 0x751A, 0x228F, 0x71EF, 0x5869, 0x20F4, 0x1EFC, + 0x7357, 0x37E0, 0x4EEC, 0x605E, 0x1652, 0x26F6, 0x31FA, 0x4A8F, + 0x6193, 0x3C2A, 0x3C42, 0x48C7, 0x3489, 0x6771, 0x4C57, 0x5CCD, + 0x2725, 0x545B, 0x503B, 0x5B42, 0x21A0, 0x2534, 0x687E, 0x70E4, + 0x1618, 0x27D7, 0x0465 +}; + +typedef struct { + const uint16_t *p; + const uint16_t *b; + const uint16_t *R2; + uint16_t p0i; + size_t point_len; +} curve_params; + +static inline const curve_params * +id_to_curve(int curve) +{ + static const curve_params pp[] = { + { P256_P, P256_B, P256_R2, 0x0001, 65 }, + { P384_P, P384_B, P384_R2, 0x0001, 97 }, + { P521_P, P521_B, P521_R2, 0x0001, 133 } + }; + + return &pp[curve - BR_EC_secp256r1]; +} + +#define I15_LEN ((BR_MAX_EC_SIZE + 29) / 15) + +/* + * Type for a point in Jacobian coordinates: + * -- three values, x, y and z, in Montgomery representation + * -- affine coordinates are X = x / z^2 and Y = y / z^3 + * -- for the point at infinity, z = 0 + */ +typedef struct { + uint16_t c[3][I15_LEN]; +} jacobian; + +/* + * We use a custom interpreter that uses a dozen registers, and + * only six operations: + * MSET(d, a) copy a into d + * MADD(d, a) d = d+a (modular) + * MSUB(d, a) d = d-a (modular) + * MMUL(d, a, b) d = a*b (Montgomery multiplication) + * MINV(d, a, b) invert d modulo p; a and b are used as scratch registers + * MTZ(d) clear return value if d = 0 + * Destination of MMUL (d) must be distinct from operands (a and b). + * There is no such constraint for MSUB and MADD. + * + * Registers include the operand coordinates, and temporaries. + */ +#define MSET(d, a) (0x0000 + ((d) << 8) + ((a) << 4)) +#define MADD(d, a) (0x1000 + ((d) << 8) + ((a) << 4)) +#define MSUB(d, a) (0x2000 + ((d) << 8) + ((a) << 4)) +#define MMUL(d, a, b) (0x3000 + ((d) << 8) + ((a) << 4) + (b)) +#define MINV(d, a, b) (0x4000 + ((d) << 8) + ((a) << 4) + (b)) +#define MTZ(d) (0x5000 + ((d) << 8)) +#define ENDCODE 0 + +/* + * Registers for the input operands. + */ +#define P1x 0 +#define P1y 1 +#define P1z 2 +#define P2x 3 +#define P2y 4 +#define P2z 5 + +/* + * Alternate names for the first input operand. + */ +#define Px 0 +#define Py 1 +#define Pz 2 + +/* + * Temporaries. + */ +#define t1 6 +#define t2 7 +#define t3 8 +#define t4 9 +#define t5 10 +#define t6 11 +#define t7 12 + +/* + * Extra scratch registers available when there is no second operand (e.g. + * for "double" and "affine"). + */ +#define t8 3 +#define t9 4 +#define t10 5 + +/* + * Doubling formulas are: + * + * s = 4*x*y^2 + * m = 3*(x + z^2)*(x - z^2) + * x' = m^2 - 2*s + * y' = m*(s - x') - 8*y^4 + * z' = 2*y*z + * + * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it + * should. This case should not happen anyway, because our curves have + * prime order, and thus do not contain any point of order 2. + * + * If P is infinity (z = 0), then again the formulas yield infinity, + * which is correct. Thus, this code works for all points. + * + * Cost: 8 multiplications + */ +static const uint16_t code_double[] = { + /* + * Compute z^2 (in t1). + */ + MMUL(t1, Pz, Pz), + + /* + * Compute x-z^2 (in t2) and then x+z^2 (in t1). + */ + MSET(t2, Px), + MSUB(t2, t1), + MADD(t1, Px), + + /* + * Compute m = 3*(x+z^2)*(x-z^2) (in t1). + */ + MMUL(t3, t1, t2), + MSET(t1, t3), + MADD(t1, t3), + MADD(t1, t3), + + /* + * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3). + */ + MMUL(t3, Py, Py), + MADD(t3, t3), + MMUL(t2, Px, t3), + MADD(t2, t2), + + /* + * Compute x' = m^2 - 2*s. + */ + MMUL(Px, t1, t1), + MSUB(Px, t2), + MSUB(Px, t2), + + /* + * Compute z' = 2*y*z. + */ + MMUL(t4, Py, Pz), + MSET(Pz, t4), + MADD(Pz, t4), + + /* + * Compute y' = m*(s - x') - 8*y^4. Note that we already have + * 2*y^2 in t3. + */ + MSUB(t2, Px), + MMUL(Py, t1, t2), + MMUL(t4, t3, t3), + MSUB(Py, t4), + MSUB(Py, t4), + + ENDCODE +}; + +/* + * Addtions formulas are: + * + * u1 = x1 * z2^2 + * u2 = x2 * z1^2 + * s1 = y1 * z2^3 + * s2 = y2 * z1^3 + * h = u2 - u1 + * r = s2 - s1 + * x3 = r^2 - h^3 - 2 * u1 * h^2 + * y3 = r * (u1 * h^2 - x3) - s1 * h^3 + * z3 = h * z1 * z2 + * + * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that + * z3 == 0, so the result is correct. + * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is + * not correct. + * h == 0 only if u1 == u2; this happens in two cases: + * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2 + * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity) + * + * Thus, the following situations are not handled correctly: + * -- P1 = 0 and P2 != 0 + * -- P1 != 0 and P2 = 0 + * -- P1 = P2 + * All other cases are properly computed. However, even in "incorrect" + * situations, the three coordinates still are properly formed field + * elements. + * + * The returned flag is cleared if r == 0. This happens in the following + * cases: + * -- Both points are on the same horizontal line (same Y coordinate). + * -- Both points are infinity. + * -- One point is infinity and the other is on line Y = 0. + * The third case cannot happen with our curves (there is no valid point + * on line Y = 0 since that would be a point of order 2). If the two + * source points are non-infinity, then remains only the case where the + * two points are on the same horizontal line. + * + * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and + * P2 != 0: + * -- If the returned value is not the point at infinity, then it was properly + * computed. + * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result + * is indeed the point at infinity. + * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should + * use the 'double' code. + * + * Cost: 16 multiplications + */ +static const uint16_t code_add[] = { + /* + * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3). + */ + MMUL(t3, P2z, P2z), + MMUL(t1, P1x, t3), + MMUL(t4, P2z, t3), + MMUL(t3, P1y, t4), + + /* + * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4). + */ + MMUL(t4, P1z, P1z), + MMUL(t2, P2x, t4), + MMUL(t5, P1z, t4), + MMUL(t4, P2y, t5), + + /* + * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4). + */ + MSUB(t2, t1), + MSUB(t4, t3), + + /* + * Report cases where r = 0 through the returned flag. + */ + MTZ(t4), + + /* + * Compute u1*h^2 (in t6) and h^3 (in t5). + */ + MMUL(t7, t2, t2), + MMUL(t6, t1, t7), + MMUL(t5, t7, t2), + + /* + * Compute x3 = r^2 - h^3 - 2*u1*h^2. + * t1 and t7 can be used as scratch registers. + */ + MMUL(P1x, t4, t4), + MSUB(P1x, t5), + MSUB(P1x, t6), + MSUB(P1x, t6), + + /* + * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. + */ + MSUB(t6, P1x), + MMUL(P1y, t4, t6), + MMUL(t1, t5, t3), + MSUB(P1y, t1), + + /* + * Compute z3 = h*z1*z2. + */ + MMUL(t1, P1z, P2z), + MMUL(P1z, t1, t2), + + ENDCODE +}; + +/* + * Check that the point is on the curve. This code snippet assumes the + * following conventions: + * -- Coordinates x and y have been freshly decoded in P1 (but not + * converted to Montgomery coordinates yet). + * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1. + */ +static const uint16_t code_check[] = { + + /* Convert x and y to Montgomery representation. */ + MMUL(t1, P1x, P2x), + MMUL(t2, P1y, P2x), + MSET(P1x, t1), + MSET(P1y, t2), + + /* Compute x^3 in t1. */ + MMUL(t2, P1x, P1x), + MMUL(t1, P1x, t2), + + /* Subtract 3*x from t1. */ + MSUB(t1, P1x), + MSUB(t1, P1x), + MSUB(t1, P1x), + + /* Add b. */ + MADD(t1, P2y), + + /* Compute y^2 in t2. */ + MMUL(t2, P1y, P1y), + + /* Compare y^2 with x^3 - 3*x + b; they must match. */ + MSUB(t1, t2), + MTZ(t1), + + /* Set z to 1 (in Montgomery representation). */ + MMUL(P1z, P2x, P2z), + + ENDCODE +}; + +/* + * Conversion back to affine coordinates. This code snippet assumes that + * the z coordinate of P2 is set to 1 (not in Montgomery representation). + */ +static const uint16_t code_affine[] = { + + /* Save z*R in t1. */ + MSET(t1, P1z), + + /* Compute z^3 in t2. */ + MMUL(t2, P1z, P1z), + MMUL(t3, P1z, t2), + MMUL(t2, t3, P2z), + + /* Invert to (1/z^3) in t2. */ + MINV(t2, t3, t4), + + /* Compute y. */ + MSET(t3, P1y), + MMUL(P1y, t2, t3), + + /* Compute (1/z^2) in t3. */ + MMUL(t3, t2, t1), + + /* Compute x. */ + MSET(t2, P1x), + MMUL(P1x, t2, t3), + + ENDCODE +}; + +static uint32_t +run_code(jacobian *P1, const jacobian *P2, + const curve_params *cc, const uint16_t *code) +{ + uint32_t r; + uint16_t t[13][I15_LEN]; + size_t u; + + r = 1; + + /* + * Copy the two operands in the dedicated registers. + */ + memcpy(t[P1x], P1->c, 3 * I15_LEN * sizeof(uint16_t)); + memcpy(t[P2x], P2->c, 3 * I15_LEN * sizeof(uint16_t)); + + /* + * Run formulas. + */ + for (u = 0;; u ++) { + unsigned op, d, a, b; + + op = code[u]; + if (op == 0) { + break; + } + d = (op >> 8) & 0x0F; + a = (op >> 4) & 0x0F; + b = op & 0x0F; + op >>= 12; + switch (op) { + uint32_t ctl; + size_t plen; + unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3]; + + case 0: + memcpy(t[d], t[a], I15_LEN * sizeof(uint16_t)); + break; + case 1: + ctl = br_i15_add(t[d], t[a], 1); + ctl |= NOT(br_i15_sub(t[d], cc->p, 0)); + br_i15_sub(t[d], cc->p, ctl); + break; + case 2: + br_i15_add(t[d], cc->p, br_i15_sub(t[d], t[a], 1)); + break; + case 3: + br_i15_montymul(t[d], t[a], t[b], cc->p, cc->p0i); + break; + case 4: + plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3; + br_i15_encode(tp, plen, cc->p); + tp[plen - 1] -= 2; + br_i15_modpow(t[d], tp, plen, + cc->p, cc->p0i, t[a], t[b]); + break; + default: + r &= ~br_i15_iszero(t[d]); + break; + } + } + + /* + * Copy back result. + */ + memcpy(P1->c, t[P1x], 3 * I15_LEN * sizeof(uint16_t)); + return r; +} + +static void +set_one(uint16_t *x, const uint16_t *p) +{ + size_t plen; + + plen = (p[0] + 31) >> 4; + memset(x, 0, plen * sizeof *x); + x[0] = p[0]; + x[1] = 0x0001; +} + +static void +point_zero(jacobian *P, const curve_params *cc) +{ + memset(P, 0, sizeof *P); + P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0]; +} + +static inline void +point_double(jacobian *P, const curve_params *cc) +{ + run_code(P, P, cc, code_double); +} + +static inline uint32_t +point_add(jacobian *P1, const jacobian *P2, const curve_params *cc) +{ + return run_code(P1, P2, cc, code_add); +} + +static void +point_mul(jacobian *P, const unsigned char *x, size_t xlen, + const curve_params *cc) +{ + /* + * We do a simple double-and-add ladder with a 2-bit window + * to make only one add every two doublings. We thus first + * precompute 2P and 3P in some local buffers. + * + * We always perform two doublings and one addition; the + * addition is with P, 2P and 3P and is done in a temporary + * array. + * + * The addition code cannot handle cases where one of the + * operands is infinity, which is the case at the start of the + * ladder. We therefore need to maintain a flag that controls + * this situation. + */ + uint32_t qz; + jacobian P2, P3, Q, T, U; + + memcpy(&P2, P, sizeof P2); + point_double(&P2, cc); + memcpy(&P3, P, sizeof P3); + point_add(&P3, &P2, cc); + + point_zero(&Q, cc); + qz = 1; + while (xlen -- > 0) { + int k; + + for (k = 6; k >= 0; k -= 2) { + uint32_t bits; + uint32_t bnz; + + point_double(&Q, cc); + point_double(&Q, cc); + memcpy(&T, P, sizeof T); + memcpy(&U, &Q, sizeof U); + bits = (*x >> k) & (uint32_t)3; + bnz = NEQ(bits, 0); + CCOPY(EQ(bits, 2), &T, &P2, sizeof T); + CCOPY(EQ(bits, 3), &T, &P3, sizeof T); + point_add(&U, &T, cc); + CCOPY(bnz & qz, &Q, &T, sizeof Q); + CCOPY(bnz & ~qz, &Q, &U, sizeof Q); + qz &= ~bnz; + } + x ++; + } + memcpy(P, &Q, sizeof Q); +} + +/* + * Decode point into Jacobian coordinates. This function does not support + * the point at infinity. If the point is invalid then this returns 0, but + * the coordinates are still set to properly formed field elements. + */ +static uint32_t +point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc) +{ + /* + * Points must use uncompressed format: + * -- first byte is 0x04; + * -- coordinates X and Y use unsigned big-endian, with the same + * length as the field modulus. + * + * We don't support hybrid format (uncompressed, but first byte + * has value 0x06 or 0x07, depending on the least significant bit + * of Y) because it is rather useless, and explicitly forbidden + * by PKIX (RFC 5480, section 2.2). + * + * We don't support compressed format either, because it is not + * much used in practice (there are or were patent-related + * concerns about point compression, which explains the lack of + * generalised support). Also, point compression support would + * need a bit more code. + */ + const unsigned char *buf; + size_t plen, zlen; + uint32_t r; + jacobian Q; + + buf = src; + point_zero(P, cc); + plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3; + if (len != 1 + (plen << 1)) { + return 0; + } + r = br_i15_decode_mod(P->c[0], buf + 1, plen, cc->p); + r &= br_i15_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p); + + /* + * Check first byte. + */ + r &= EQ(buf[0], 0x04); + /* obsolete + r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06) + & ~(uint32_t)(buf[0] ^ buf[plen << 1])); + */ + + /* + * Convert coordinates and check that the point is valid. + */ + zlen = ((cc->p[0] + 31) >> 4) * sizeof(uint16_t); + memcpy(Q.c[0], cc->R2, zlen); + memcpy(Q.c[1], cc->b, zlen); + set_one(Q.c[2], cc->p); + r &= ~run_code(P, &Q, cc, code_check); + return r; +} + +/* + * Encode a point. This method assumes that the point is correct and is + * not the point at infinity. Encoded size is always 1+2*plen, where + * plen is the field modulus length, in bytes. + */ +static void +point_encode(void *dst, const jacobian *P, const curve_params *cc) +{ + unsigned char *buf; + size_t plen; + jacobian Q, T; + + buf = dst; + plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3; + buf[0] = 0x04; + memcpy(&Q, P, sizeof *P); + set_one(T.c[2], cc->p); + run_code(&Q, &T, cc, code_affine); + br_i15_encode(buf + 1, plen, Q.c[0]); + br_i15_encode(buf + 1 + plen, plen, Q.c[1]); +} + +static const br_ec_curve_def * +id_to_curve_def(int curve) +{ + switch (curve) { + case BR_EC_secp256r1: + return &br_secp256r1; + case BR_EC_secp384r1: + return &br_secp384r1; + case BR_EC_secp521r1: + return &br_secp521r1; + } + return NULL; +} + +static const unsigned char * +api_generator(int curve, size_t *len) +{ + const br_ec_curve_def *cd; + + cd = id_to_curve_def(curve); + *len = cd->generator_len; + return cd->generator; +} + +static const unsigned char * +api_order(int curve, size_t *len) +{ + const br_ec_curve_def *cd; + + cd = id_to_curve_def(curve); + *len = cd->order_len; + return cd->order; +} + +static uint32_t +api_mul(unsigned char *G, size_t Glen, + const unsigned char *x, size_t xlen, int curve) +{ + uint32_t r; + const curve_params *cc; + jacobian P; + + cc = id_to_curve(curve); + r = point_decode(&P, G, Glen, cc); + point_mul(&P, x, xlen, cc); + if (Glen == cc->point_len) { + point_encode(G, &P, cc); + } + return r; +} + +static uint32_t +api_muladd(unsigned char *A, const unsigned char *B, size_t len, + const unsigned char *x, size_t xlen, + const unsigned char *y, size_t ylen, int curve) +{ + uint32_t r, t, z; + const curve_params *cc; + jacobian P, Q; + + /* + * TODO: see about merging the two ladders. Right now, we do + * two independant point multiplications, which is a bit + * wasteful of CPU resources (but yields short code). + */ + + cc = id_to_curve(curve); + r = point_decode(&P, A, len, cc); + r &= point_decode(&Q, B, len, cc); + point_mul(&P, x, xlen, cc); + point_mul(&Q, y, ylen, cc); + + /* + * We want to compute P+Q. Since the base points A and B are distinct + * from infinity, and the multipliers are non-zero and lower than the + * curve order, then we know that P and Q are non-infinity. This + * leaves two special situations to test for: + * -- If P = Q then we must use point_double(). + * -- If P+Q = 0 then we must report an error. + */ + t = point_add(&P, &Q, cc); + point_double(&Q, cc); + z = br_i15_iszero(P.c[2]); + + /* + * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we + * have the following: + * + * z = 0, t = 0 return P (normal addition) + * z = 0, t = 1 return P (normal addition) + * z = 1, t = 0 return Q (a 'double' case) + * z = 1, t = 1 report an error (P+Q = 0) + */ + CCOPY(z & ~t, &P, &Q, sizeof Q); + point_encode(A, &P, cc); + r &= ~(z & t); + + return r; +} + +/* see bearssl_ec.h */ +const br_ec_impl br_ec_prime_i15 = { + (uint32_t)0x03800000, + &api_generator, + &api_order, + &api_mul, + &api_muladd +}; diff --git a/src/ec/ec_prime_i31.c b/src/ec/ec_prime_i31.c index 440641e..30c7ef3 100644 --- a/src/ec/ec_prime_i31.c +++ b/src/ec/ec_prime_i31.c @@ -135,7 +135,7 @@ typedef struct { /* * We use a custom interpreter that uses a dozen registers, and - * only four operations: + * only six operations: * MSET(d, a) copy a into d * MADD(d, a) d = d+a (modular) * MSUB(d, a) d = d-a (modular) diff --git a/src/ec/ecdsa_i15_bits.c b/src/ec/ecdsa_i15_bits.c new file mode 100644 index 0000000..402d14a --- /dev/null +++ b/src/ec/ecdsa_i15_bits.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +void +br_ecdsa_i15_bits2int(uint16_t *x, + const void *src, size_t len, uint32_t ebitlen) +{ + uint32_t bitlen, hbitlen; + int sc; + + bitlen = ebitlen - (ebitlen >> 4); + hbitlen = (uint32_t)len << 3; + if (hbitlen > bitlen) { + len = (bitlen + 7) >> 3; + sc = (int)((hbitlen - bitlen) & 7); + } else { + sc = 0; + } + br_i15_zero(x, ebitlen); + br_i15_decode(x, src, len); + br_i15_rshift(x, sc); + x[0] = ebitlen; +} diff --git a/src/ec/ecdsa_i15_sign_asn1.c b/src/ec/ecdsa_i15_sign_asn1.c new file mode 100644 index 0000000..ab4a283 --- /dev/null +++ b/src/ec/ecdsa_i15_sign_asn1.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define ORDER_LEN ((BR_MAX_EC_SIZE + 7) >> 3) + +/* see bearssl_ec.h */ +size_t +br_ecdsa_i15_sign_asn1(const br_ec_impl *impl, + const br_hash_class *hf, const void *hash_value, + const br_ec_private_key *sk, void *sig) +{ + unsigned char rsig[(ORDER_LEN << 1) + 12]; + size_t sig_len; + + sig_len = br_ecdsa_i15_sign_raw(impl, hf, hash_value, sk, rsig); + if (sig_len == 0) { + return 0; + } + sig_len = br_ecdsa_raw_to_asn1(rsig, sig_len); + memcpy(sig, rsig, sig_len); + return sig_len; +} diff --git a/src/ec/ecdsa_i15_sign_raw.c b/src/ec/ecdsa_i15_sign_raw.c new file mode 100644 index 0000000..87b2f33 --- /dev/null +++ b/src/ec/ecdsa_i15_sign_raw.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define I15_LEN ((BR_MAX_EC_SIZE + 29) / 15) +#define POINT_LEN (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1)) +#define ORDER_LEN ((BR_MAX_EC_SIZE + 7) >> 3) + +/* see bearssl_ec.h */ +size_t +br_ecdsa_i15_sign_raw(const br_ec_impl *impl, + const br_hash_class *hf, const void *hash_value, + const br_ec_private_key *sk, void *sig) +{ + /* + * IMPORTANT: this code is fit only for curves with a prime + * order. This is needed so that modular reduction of the X + * coordinate of a point can be done with a simple subtraction. + * We also rely on the last byte of the curve order to be distinct + * from 0 and 1. + */ + const br_ec_curve_def *cd; + uint16_t n[I15_LEN], r[I15_LEN], s[I15_LEN], x[I15_LEN]; + uint16_t m[I15_LEN], k[I15_LEN], t1[I15_LEN], t2[I15_LEN]; + unsigned char tt[ORDER_LEN << 1]; + unsigned char eU[POINT_LEN]; + size_t hash_len, nlen, ulen; + uint16_t n0i; + uint32_t ctl; + br_hmac_drbg_context drbg; + + /* + * If the curve is not supported, then exit with an error. + */ + if (((impl->supported_curves >> sk->curve) & 1) == 0) { + return 0; + } + + /* + * Get the curve parameters (generator and order). + */ + switch (sk->curve) { + case BR_EC_secp256r1: + cd = &br_secp256r1; + break; + case BR_EC_secp384r1: + cd = &br_secp384r1; + break; + case BR_EC_secp521r1: + cd = &br_secp521r1; + break; + default: + return 0; + } + + /* + * Get modulus. + */ + nlen = cd->order_len; + br_i15_decode(n, cd->order, nlen); + n0i = br_i15_ninv15(n[1]); + + /* + * Get private key as an i15 integer. This also checks that the + * private key is well-defined (not zero, and less than the + * curve order). + */ + if (!br_i15_decode_mod(x, sk->x, sk->xlen, n)) { + return 0; + } + if (br_i15_iszero(x)) { + return 0; + } + + /* + * Get hash length. + */ + hash_len = (hf->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK; + + /* + * Truncate and reduce the hash value modulo the curve order. + */ + br_ecdsa_i15_bits2int(m, hash_value, hash_len, n[0]); + br_i15_sub(m, n, br_i15_sub(m, n, 0) ^ 1); + + /* + * RFC 6979 generation of the "k" value. + * + * The process uses HMAC_DRBG (with the hash function used to + * process the message that is to be signed). The seed is the + * concatenation of the encodings of the private key and + * the hash value (after truncation and modular reduction). + */ + br_i15_encode(tt, nlen, x); + br_i15_encode(tt + nlen, nlen, m); + br_hmac_drbg_init(&drbg, hf, tt, nlen << 1); + for (;;) { + br_hmac_drbg_generate(&drbg, tt, nlen); + br_ecdsa_i15_bits2int(k, tt, nlen, n[0]); + if (br_i15_iszero(k)) { + continue; + } + if (br_i15_sub(k, n, 0)) { + break; + } + } + + /* + * Compute k*G and extract the X coordinate, then reduce it + * modulo the curve order. Since we support only curves with + * prime order, that reduction is only a matter of computing + * a subtraction. + */ + ulen = cd->generator_len; + memcpy(eU, cd->generator, ulen); + br_i15_encode(tt, nlen, k); + if (!impl->mul(eU, ulen, tt, nlen, sk->curve)) { + /* + * Point multiplication may fail here only if the + * EC implementation does not support the curve, or the + * private key is incorrect (x is a multiple of the curve + * order). + */ + return 0; + } + br_i15_zero(r, n[0]); + br_i15_decode(r, &eU[1], ulen >> 1); + r[0] = n[0]; + br_i15_sub(r, n, br_i15_sub(r, n, 0) ^ 1); + + /* + * Compute 1/k in double-Montgomery representation. We do so by + * first converting _from_ Montgomery representation (twice), + * then using a modular exponentiation. + */ + br_i15_from_monty(k, n, n0i); + br_i15_from_monty(k, n, n0i); + memcpy(tt, cd->order, nlen); + tt[nlen - 1] -= 2; + br_i15_modpow(k, tt, nlen, n, n0i, t1, t2); + + /* + * Compute s = (m+xr)/k (mod n). + * The k[] array contains R^2/k (double-Montgomery representation); + * we thus can use direct Montgomery multiplications and conversions + * from Montgomery, avoiding any call to br_i15_to_monty() (which + * is slower). + */ + br_i15_from_monty(m, n, n0i); + br_i15_montymul(t1, x, r, n, n0i); + ctl = br_i15_add(t1, m, 1); + ctl |= br_i15_sub(t1, n, 0) ^ 1; + br_i15_sub(t1, n, ctl); + br_i15_montymul(s, t1, k, n, n0i); + + /* + * Encode r and s in the signature. + */ + br_i15_encode(sig, nlen, r); + br_i15_encode((unsigned char *)sig + nlen, nlen, s); + return nlen << 1; +} diff --git a/src/ec/ecdsa_i15_vrfy_asn1.c b/src/ec/ecdsa_i15_vrfy_asn1.c new file mode 100644 index 0000000..f4bef99 --- /dev/null +++ b/src/ec/ecdsa_i15_vrfy_asn1.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define FIELD_LEN ((BR_MAX_EC_SIZE + 7) >> 3) + +/* see bearssl_ec.h */ +uint32_t +br_ecdsa_i15_vrfy_asn1(const br_ec_impl *impl, + const void *hash, size_t hash_len, + const br_ec_public_key *pk, + const void *sig, size_t sig_len) +{ + /* + * We use a double-sized buffer because a malformed ASN.1 signature + * may trigger a size expansion when converting to "raw" format. + */ + unsigned char rsig[(FIELD_LEN << 2) + 24]; + + if (sig_len > ((sizeof rsig) >> 1)) { + return 0; + } + memcpy(rsig, sig, sig_len); + sig_len = br_ecdsa_asn1_to_raw(rsig, sig_len); + return br_ecdsa_i15_vrfy_raw(impl, hash, hash_len, pk, rsig, sig_len); +} diff --git a/src/ec/ecdsa_i15_vrfy_raw.c b/src/ec/ecdsa_i15_vrfy_raw.c new file mode 100644 index 0000000..5a16680 --- /dev/null +++ b/src/ec/ecdsa_i15_vrfy_raw.c @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define I15_LEN ((BR_MAX_EC_SIZE + 29) / 15) +#define POINT_LEN (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1)) + +/* see bearssl_ec.h */ +uint32_t +br_ecdsa_i15_vrfy_raw(const br_ec_impl *impl, + const void *hash, size_t hash_len, + const br_ec_public_key *pk, + const void *sig, size_t sig_len) +{ + /* + * IMPORTANT: this code is fit only for curves with a prime + * order. This is needed so that modular reduction of the X + * coordinate of a point can be done with a simple subtraction. + */ + const br_ec_curve_def *cd; + uint16_t n[I15_LEN], r[I15_LEN], s[I15_LEN], t1[I15_LEN], t2[I15_LEN]; + unsigned char tx[(BR_MAX_EC_SIZE + 7) >> 3]; + unsigned char ty[(BR_MAX_EC_SIZE + 7) >> 3]; + unsigned char eU[POINT_LEN]; + size_t nlen, rlen, ulen; + uint16_t n0i; + uint32_t res; + + /* + * If the curve is not supported, then report an error. + */ + if (((impl->supported_curves >> pk->curve) & 1) == 0) { + return 0; + } + + /* + * Get the curve parameters (generator and order). + */ + switch (pk->curve) { + case BR_EC_secp256r1: + cd = &br_secp256r1; + break; + case BR_EC_secp384r1: + cd = &br_secp384r1; + break; + case BR_EC_secp521r1: + cd = &br_secp521r1; + break; + default: + return 0; + } + + /* + * Signature length must be even. + */ + if (sig_len & 1) { + return 0; + } + rlen = sig_len >> 1; + + /* + * Public key point must have the proper size for this curve. + */ + if (pk->qlen != cd->generator_len) { + return 0; + } + + /* + * Get modulus; then decode the r and s values. They must be + * lower than the modulus, and s must not be null. + */ + nlen = cd->order_len; + br_i15_decode(n, cd->order, nlen); + n0i = br_i15_ninv15(n[1]); + if (!br_i15_decode_mod(r, sig, rlen, n)) { + return 0; + } + if (!br_i15_decode_mod(s, (const unsigned char *)sig + rlen, rlen, n)) { + return 0; + } + if (br_i15_iszero(s)) { + return 0; + } + + /* + * Invert s. We do that with a modular exponentiation; we use + * the fact that for all the curves we support, the least + * significant byte is not 0 or 1, so we can subtract 2 without + * any carry to process. + * We also want 1/s in Montgomery representation, which can be + * done by converting _from_ Montgomery representation before + * the inversion (because (1/s)*R = 1/(s/R)). + */ + br_i15_from_monty(s, n, n0i); + memcpy(tx, cd->order, nlen); + tx[nlen - 1] -= 2; + br_i15_modpow(s, tx, nlen, n, n0i, t1, t2); + + /* + * Truncate the hash to the modulus length (in bits) and reduce + * it modulo the curve order. The modular reduction can be done + * with a subtraction since the truncation already reduced the + * value to the modulus bit length. + */ + br_ecdsa_i15_bits2int(t1, hash, hash_len, n[0]); + br_i15_sub(t1, n, br_i15_sub(t1, n, 0) ^ 1); + + /* + * Multiply the (truncated, reduced) hash value with 1/s, result in + * t2, encoded in ty. + */ + br_i15_montymul(t2, t1, s, n, n0i); + br_i15_encode(ty, nlen, t2); + + /* + * Multiply r with 1/s, result in t1, encoded in tx. + */ + br_i15_montymul(t1, r, s, n, n0i); + br_i15_encode(tx, nlen, t1); + + /* + * Compute the point x*Q + y*G. + */ + ulen = cd->generator_len; + memcpy(eU, pk->q, ulen); + res = impl->muladd(eU, cd->generator, ulen, + tx, nlen, ty, nlen, cd->curve); + + /* + * Get the X coordinate, reduce modulo the curve order, and + * compare with the 'r' value. + * + * The modular reduction can be done with subtractions because + * we work with curves of prime order, so the curve order is + * close to the field order (Hasse's theorem). + */ + br_i15_zero(t1, n[0]); + br_i15_decode(t1, &eU[1], ulen >> 1); + t1[0] = n[0]; + br_i15_sub(t1, n, br_i15_sub(t1, n, 0) ^ 1); + res &= ~br_i15_sub(t1, r, 1); + res &= br_i15_iszero(t1); + return res; +} diff --git a/src/inner.h b/src/inner.h index da64f37..8417b24 100644 --- a/src/inner.h +++ b/src/inner.h @@ -532,11 +532,30 @@ MAX(uint32_t x, uint32_t y) * (old) platforms where the default MUL31 is not. Unfortunately, it is * also substantially slower, and yields larger code, on more modern * platforms, which is why it is deactivated by default. + * + * MUL31_lo() must do some extra work because on some platforms, the + * _signed_ multiplication may return early if the top bits are 1. + * Simply truncating (casting) the output of MUL31() would not be + * sufficient, because the compiler may notice that we keep only the low + * word, and then replace automatically the unsigned multiplication with + * a signed multiplication opcode. */ #define MUL31(x, y) ((uint64_t)((x) | (uint32_t)0x80000000) \ * (uint64_t)((y) | (uint32_t)0x80000000) \ - ((uint64_t)(x) << 31) - ((uint64_t)(y) << 31) \ - ((uint64_t)1 << 62)) +static inline uint32_t +MUL31_lo(uint32_t x, uint32_t y) +{ + uint32_t xl, xh; + uint32_t yl, yh; + + xl = (x & 0xFFFF) | (uint32_t)0x80000000; + xh = (x >> 16) | (uint32_t)0x80000000; + yl = (y & 0xFFFF) | (uint32_t)0x80000000; + yh = (y >> 16) | (uint32_t)0x80000000; + return (xl * yl + ((xl * yh + xh * yl) << 16)) & (uint32_t)0x7FFFFFFF; +} #else @@ -544,8 +563,10 @@ MAX(uint32_t x, uint32_t y) * Multiply two 31-bit integers, with a 62-bit result. This default * implementation assumes that the basic multiplication operator * yields constant-time code. + * The MUL31_lo() macro returns only the low 31 bits of the product. */ -#define MUL31(x, y) ((uint64_t)(x) * (uint64_t)(y)) +#define MUL31(x, y) ((uint64_t)(x) * (uint64_t)(y)) +#define MUL31_lo(x, y) (((uint32_t)(x) * (uint32_t)(y)) & (uint32_t)0x7FFFFFFF) #endif @@ -558,7 +579,7 @@ MAX(uint32_t x, uint32_t y) */ #if BR_CT_MUL15 #define MUL15(x, y) (((uint32_t)(x) | (uint32_t)0x80000000) \ - * ((uint32_t)(x) | (uint32_t)0x80000000) \ + * ((uint32_t)(y) | (uint32_t)0x80000000) \ & (uint32_t)0x3FFFFFFF) #else #define MUL15(x, y) ((uint32_t)(x) * (uint32_t)(y)) @@ -1046,6 +1067,53 @@ void br_i31_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b); /* ==================================================================== */ +static inline void +br_i15_zero(uint16_t *x, uint16_t bit_len) +{ + *x ++ = bit_len; + memset(x, 0, ((bit_len + 15) >> 4) * sizeof *x); +} + +uint32_t br_i15_iszero(const uint16_t *x); + +uint16_t br_i15_ninv15(uint16_t x); + +uint32_t br_i15_add(uint16_t *a, const uint16_t *b, uint32_t ctl); + +uint32_t br_i15_sub(uint16_t *a, const uint16_t *b, uint32_t ctl); + +void br_i15_muladd_small(uint16_t *x, uint16_t z, const uint16_t *m); + +void br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y, + const uint16_t *m, uint16_t m0i); + +void br_i15_to_monty(uint16_t *x, const uint16_t *m); + +void br_i15_modpow(uint16_t *x, const unsigned char *e, size_t elen, + const uint16_t *m, uint16_t m0i, uint16_t *t1, uint16_t *t2); + +void br_i15_encode(void *dst, size_t len, const uint16_t *x); + +uint32_t br_i15_decode_mod(uint16_t *x, + const void *src, size_t len, const uint16_t *m); + +void br_i15_rshift(uint16_t *x, int count); + +uint32_t br_i15_bit_length(uint16_t *x, size_t xlen); + +void br_i15_decode(uint16_t *x, const void *src, size_t len); + +void br_i15_from_monty(uint16_t *x, const uint16_t *m, uint16_t m0i); + +void br_i15_decode_reduce(uint16_t *x, + const void *src, size_t len, const uint16_t *m); + +void br_i15_reduce(uint16_t *x, const uint16_t *a, const uint16_t *m); + +void br_i15_mulacc(uint16_t *d, const uint16_t *a, const uint16_t *b); + +/* ==================================================================== */ + static inline size_t br_digest_size(const br_hash_class *digest_class) { @@ -1343,6 +1411,29 @@ unsigned br_aes_ct64_keysched(uint64_t *comp_skey, void br_aes_ct64_skey_expand(uint64_t *skey, unsigned num_rounds, const uint64_t *comp_skey); +/* ==================================================================== */ +/* + * RSA. + */ + +/* + * Apply proper PKCS#1 v1.5 padding (for signatures). 'hash_oid' is + * the encoded hash function OID, or NULL. + */ +uint32_t br_rsa_pkcs1_sig_pad(const unsigned char *hash_oid, + const unsigned char *hash, size_t hash_len, + uint32_t n_bitlen, unsigned char *x); + +/* + * Check PKCS#1 v1.5 padding (for signatures). 'hash_oid' is the encoded + * hash function OID, or NULL. The provided 'sig' value is _after_ the + * modular exponentiation, i.e. it should be the padded hash. On + * success, the hashed message is extracted. + */ +uint32_t br_rsa_pkcs1_sig_unpad(const unsigned char *sig, size_t sig_len, + const unsigned char *hash_oid, size_t hash_len, + unsigned char *hash_out); + /* ==================================================================== */ /* * Elliptic curves. @@ -1364,6 +1455,8 @@ extern const br_ec_curve_def br_secp256r1; extern const br_ec_curve_def br_secp384r1; extern const br_ec_curve_def br_secp521r1; +#if 0 +/* obsolete */ /* * Type for the parameters for a "prime curve": * coordinates are in GF(p), with p prime @@ -1383,6 +1476,7 @@ extern const br_ec_prime_i31_curve br_ec_prime_i31_secp384r1; extern const br_ec_prime_i31_curve br_ec_prime_i31_secp521r1; #define BR_EC_I31_LEN ((BR_MAX_EC_SIZE + 61) / 31) +#endif /* * Decode some bytes as an i31 integer, with truncation (corresponding @@ -1394,6 +1488,16 @@ extern const br_ec_prime_i31_curve br_ec_prime_i31_secp521r1; void br_ecdsa_i31_bits2int(uint32_t *x, const void *src, size_t len, uint32_t ebitlen); +/* + * Decode some bytes as an i15 integer, with truncation (corresponding + * to the 'bits2int' operation in RFC 6979). The target ENCODED bit + * length is provided as last parameter. The resulting value will have + * this declared bit length, and consists the big-endian unsigned decoding + * of exactly that many bits in the source (capped at the source length). + */ +void br_ecdsa_i15_bits2int(uint16_t *x, + const void *src, size_t len, uint32_t ebitlen); + /* ==================================================================== */ /* * SSL/TLS support functions. diff --git a/src/int/i15_core.c b/src/int/i15_core.c new file mode 100644 index 0000000..5ae3b31 --- /dev/null +++ b/src/int/i15_core.c @@ -0,0 +1,479 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * This file contains the core "big integer" functions for the i15 + * implementation, that represents integers as sequences of 15-bit + * words. + */ + +/* see inner.h */ +uint32_t +br_i15_iszero(const uint16_t *x) +{ + uint32_t z; + size_t u; + + z = 0; + for (u = (x[0] + 15) >> 4; u > 0; u --) { + z |= x[u]; + } + return ~(z | -z) >> 31; +} + +/* see inner.h */ +uint16_t +br_i15_ninv15(uint16_t x) +{ + uint32_t y; + + y = 2 - x; + y = MUL15(y, 2 - MUL15(x, y)); + y = MUL15(y, 2 - MUL15(x, y)); + y = MUL15(y, 2 - MUL15(x, y)); + return MUX(x & 1, -y, 0) & 0x7FFF; +} + +/* see inner.h */ +uint32_t +br_i15_add(uint16_t *a, const uint16_t *b, uint32_t ctl) +{ + uint32_t cc; + size_t u, m; + + cc = 0; + m = (a[0] + 31) >> 4; + for (u = 1; u < m; u ++) { + uint32_t aw, bw, naw; + + aw = a[u]; + bw = b[u]; + naw = aw + bw + cc; + cc = naw >> 15; + a[u] = MUX(ctl, naw & 0x7FFF, aw); + } + return cc; +} + +/* see inner.h */ +uint32_t +br_i15_sub(uint16_t *a, const uint16_t *b, uint32_t ctl) +{ + uint32_t cc; + size_t u, m; + + cc = 0; + m = (a[0] + 31) >> 4; + for (u = 1; u < m; u ++) { + uint32_t aw, bw, naw; + + aw = a[u]; + bw = b[u]; + naw = aw - bw - cc; + cc = naw >> 31; + a[u] = MUX(ctl, naw & 0x7FFF, aw); + } + return cc; +} + +/* + * Constant-time division. The divisor must not be larger than 16 bits, + * and the quotient must fit on 17 bits. + */ +static uint32_t +divrem16(uint32_t x, uint32_t d, uint32_t *r) +{ + int i; + uint32_t q; + + q = 0; + d <<= 16; + for (i = 16; i >= 0; i --) { + uint32_t ctl; + + ctl = LE(d, x); + q |= ctl << i; + x -= (-ctl) & d; + d >>= 1; + } + if (r != NULL) { + *r = x; + } + return q; +} + +/* see inner.h */ +void +br_i15_muladd_small(uint16_t *x, uint16_t z, const uint16_t *m) +{ + /* + * Constant-time: we accept to leak the exact bit length of the + * modulus m. + */ + unsigned m_bitlen, mblr; + size_t u, mlen; + uint32_t hi, a0, a, b, q; + uint32_t cc, tb, over, under; + + /* + * Simple case: the modulus fits on one word. + */ + m_bitlen = m[0]; + if (m_bitlen == 0) { + return; + } + if (m_bitlen <= 15) { + uint32_t rem; + + divrem16(((uint32_t)x[1] << 15) | z, m[1], &rem); + x[1] = rem; + return; + } + mlen = (m_bitlen + 15) >> 4; + mblr = m_bitlen & 15; + + /* + * Principle: we estimate the quotient (x*2^15+z)/m by + * doing a 30/15 division with the high words. + * + * Let: + * w = 2^15 + * a = (w*a0 + a1) * w^N + a2 + * b = b0 * w^N + b2 + * such that: + * 0 <= a0 < w + * 0 <= a1 < w + * 0 <= a2 < w^N + * w/2 <= b0 < w + * 0 <= b2 < w^N + * a < w*b + * I.e. the two top words of a are a0:a1, the top word of b is + * b0, we ensured that b0 is "full" (high bit set), and a is + * such that the quotient q = a/b fits on one word (0 <= q < w). + * + * If a = b*q + r (with 0 <= r < q), then we can estimate q by + * using a division on the top words: + * a0*w + a1 = b0*u + v (with 0 <= v < b0) + * Then the following holds: + * 0 <= u <= w + * u-2 <= q <= u + */ + hi = x[mlen]; + if (mblr == 0) { + a0 = x[mlen]; + memmove(x + 2, x + 1, (mlen - 1) * sizeof *x); + x[1] = z; + a = (a0 << 15) + x[mlen]; + b = m[mlen]; + } else { + a0 = (x[mlen] << (15 - mblr)) | (x[mlen - 1] >> mblr); + memmove(x + 2, x + 1, (mlen - 1) * sizeof *x); + x[1] = z; + a = (a0 << 15) | (((x[mlen] << (15 - mblr)) + | (x[mlen - 1] >> mblr)) & 0x7FFF); + b = (m[mlen] << (15 - mblr)) | (m[mlen - 1] >> mblr); + } + q = divrem16(a, b, NULL); + + /* + * We computed an estimate for q, but the real one may be q, + * q-1 or q-2; moreover, the division may have returned a value + * 8000 or even 8001 if the two high words were identical, and + * we want to avoid values beyond 7FFF. We thus adjust q so + * that the "true" multiplier will be q+1, q or q-1, and q is + * in the 0000..7FFF range. + */ + q = MUX(EQ(b, a0), 0x7FFF, q - 1 + ((q - 1) >> 31)); + + /* + * We subtract q*m from x (x has an extra high word of value 'hi'). + * Since q may be off by 1 (in either direction), we may have to + * add or subtract m afterwards. + * + * The 'tb' flag will be true (1) at the end of the loop if the + * result is greater than or equal to the modulus (not counting + * 'hi' or the carry). + */ + cc = 0; + tb = 1; + for (u = 1; u <= mlen; u ++) { + uint32_t mw, zl, xw, nxw; + + mw = m[u]; + zl = MUL15(mw, q) + cc; + cc = zl >> 15; + zl &= 0x7FFF; + xw = x[u]; + nxw = xw - zl; + cc += nxw >> 31; + nxw &= 0x7FFF; + x[u] = nxw; + tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw)); + } + + /* + * If we underestimated q, then either cc < hi (one extra bit + * beyond the top array word), or cc == hi and tb is true (no + * extra bit, but the result is not lower than the modulus). + * + * If we overestimated q, then cc > hi. + */ + over = GT(cc, hi); + under = ~over & (tb | LT(cc, hi)); + br_i15_add(x, m, over); + br_i15_sub(x, m, under); +} + +/* see inner.h */ +void +br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y, + const uint16_t *m, uint16_t m0i) +{ + size_t len, len4, u, v; + uint32_t dh; + + len = (m[0] + 15) >> 4; + len4 = len & ~(size_t)3; + br_i15_zero(d, m[0]); + dh = 0; + for (u = 0; u < len; u ++) { + uint32_t f, xu, r, zh; + + xu = x[u + 1]; + f = MUL15(d[1] + MUL15(x[u + 1], y[1]), m0i) & 0x7FFF; + + r = 0; + for (v = 0; v < len4; v += 4) { + uint32_t z; + + z = d[v + 1] + MUL15(xu, y[v + 1]) + + MUL15(f, m[v + 1]) + r; + r = z >> 15; + d[v + 0] = z & 0x7FFF; + z = d[v + 2] + MUL15(xu, y[v + 2]) + + MUL15(f, m[v + 2]) + r; + r = z >> 15; + d[v + 1] = z & 0x7FFF; + z = d[v + 3] + MUL15(xu, y[v + 3]) + + MUL15(f, m[v + 3]) + r; + r = z >> 15; + d[v + 2] = z & 0x7FFF; + z = d[v + 4] + MUL15(xu, y[v + 4]) + + MUL15(f, m[v + 4]) + r; + r = z >> 15; + d[v + 3] = z & 0x7FFF; + } + for (; v < len; v ++) { + uint32_t z; + + z = d[v + 1] + MUL15(xu, y[v + 1]) + + MUL15(f, m[v + 1]) + r; + r = z >> 15; + d[v + 0] = z & 0x7FFF; + } + + zh = dh + r; + d[len] = zh & 0x7FFF; + dh = zh >> 31; + } + + /* + * Restore the bit length (it was overwritten in the loop above). + */ + d[0] = m[0]; + + /* + * d[] may be greater than m[], but it is still lower than twice + * the modulus. + */ + br_i15_sub(d, m, NEQ(dh, 0) | NOT(br_i15_sub(d, m, 0))); +} + +/* see inner.h */ +void +br_i15_to_monty(uint16_t *x, const uint16_t *m) +{ + unsigned k; + + for (k = (m[0] + 15) >> 4; k > 0; k --) { + br_i15_muladd_small(x, 0, m); + } +} + +/* see inner.h */ +void +br_i15_modpow(uint16_t *x, + const unsigned char *e, size_t elen, + const uint16_t *m, uint16_t m0i, uint16_t *t1, uint16_t *t2) +{ + size_t mlen; + unsigned k; + + mlen = ((m[0] + 31) >> 4) * sizeof m[0]; + memcpy(t1, x, mlen); + br_i15_to_monty(t1, m); + br_i15_zero(x, m[0]); + x[1] = 1; + for (k = 0; k < ((unsigned)elen << 3); k ++) { + uint32_t ctl; + + ctl = (e[elen - 1 - (k >> 3)] >> (k & 7)) & 1; + br_i15_montymul(t2, x, t1, m, m0i); + CCOPY(ctl, x, t2, mlen); + br_i15_montymul(t2, t1, t1, m, m0i); + memcpy(t1, t2, mlen); + } +} + +/* see inner.h */ +void +br_i15_encode(void *dst, size_t len, const uint16_t *x) +{ + unsigned char *buf; + size_t u, xlen; + uint32_t acc; + int acc_len; + + xlen = (x[0] + 15) >> 4; + if (xlen == 0) { + memset(dst, 0, len); + return; + } + u = 1; + acc = 0; + acc_len = 0; + buf = dst; + while (len -- > 0) { + if (acc_len < 8) { + if (u <= xlen) { + acc += (uint32_t)x[u ++] << acc_len; + } + acc_len += 15; + } + buf[len] = (unsigned char)acc; + acc >>= 8; + acc_len -= 8; + } +} + +/* see inner.h */ +uint32_t +br_i15_decode_mod(uint16_t *x, const void *src, size_t len, const uint16_t *m) +{ + /* + * Two-pass algorithm: in the first pass, we determine whether the + * value fits; in the second pass, we do the actual write. + * + * During the first pass, 'r' contains the comparison result so + * far: + * 0x00000000 value is equal to the modulus + * 0x00000001 value is greater than the modulus + * 0xFFFFFFFF value is lower than the modulus + * + * Since we iterate starting with the least significant bytes (at + * the end of src[]), each new comparison overrides the previous + * except when the comparison yields 0 (equal). + * + * During the second pass, 'r' is either 0xFFFFFFFF (value fits) + * or 0x00000000 (value does not fit). + * + * We must iterate over all bytes of the source, _and_ possibly + * some extra virutal bytes (with value 0) so as to cover the + * complete modulus as well. We also add 4 such extra bytes beyond + * the modulus length because it then guarantees that no accumulated + * partial word remains to be processed. + */ + const unsigned char *buf; + size_t mlen, tlen; + int pass; + uint32_t r; + + buf = src; + mlen = (m[0] + 15) >> 4; + tlen = (mlen << 1); + if (tlen < len) { + tlen = len; + } + tlen += 4; + r = 0; + for (pass = 0; pass < 2; pass ++) { + size_t u, v; + uint32_t acc; + int acc_len; + + v = 1; + acc = 0; + acc_len = 0; + for (u = 0; u < tlen; u ++) { + uint32_t b; + + if (u < len) { + b = buf[len - 1 - u]; + } else { + b = 0; + } + acc |= (b << acc_len); + acc_len += 8; + if (acc_len >= 15) { + uint32_t xw; + + xw = acc & (uint32_t)0x7FFF; + acc_len -= 15; + acc = b >> (8 - acc_len); + if (v <= mlen) { + if (pass) { + x[v] = r & xw; + } else { + uint32_t cc; + + cc = (uint32_t)CMP(xw, m[v]); + r = MUX(EQ(cc, 0), r, cc); + } + } else { + if (!pass) { + r = MUX(EQ(xw, 0), r, 1); + } + } + v ++; + } + } + + /* + * When we reach this point at the end of the first pass: + * r is either 0, 1 or -1; we want to set r to 0 if it + * is equal to 0 or 1, and leave it to -1 otherwise. + * + * When we reach this point at the end of the second pass: + * r is either 0 or -1; we want to leave that value + * untouched. This is a subcase of the previous. + */ + r >>= 1; + r |= (r << 1); + } + + x[0] = m[0]; + return r & (uint32_t)1; +} diff --git a/src/int/i15_ext1.c b/src/int/i15_ext1.c new file mode 100644 index 0000000..a99ac1a --- /dev/null +++ b/src/int/i15_ext1.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * This file contains some additional functions for "i15" big integers. + * These functions are needed to support ECDSA. + */ + +/* see inner.h */ +void +br_i15_rshift(uint16_t *x, int count) +{ + size_t u, len; + unsigned r; + + len = (x[0] + 15) >> 4; + if (len == 0) { + return; + } + r = x[1] >> count; + for (u = 2; u <= len; u ++) { + unsigned w; + + w = x[u]; + x[u - 1] = ((w << (15 - count)) | r) & 0x7FFF; + r = w >> count; + } + x[len] = r; +} + +/* see inner.h */ +uint32_t +br_i15_bit_length(uint16_t *x, size_t xlen) +{ + uint32_t tw, twk; + + tw = 0; + twk = 0; + while (xlen -- > 0) { + uint32_t w, c; + + c = EQ(tw, 0); + w = x[xlen]; + tw = MUX(c, w, tw); + twk = MUX(c, (uint32_t)xlen, twk); + } + return (twk << 4) + BIT_LENGTH(tw); +} + +/* see inner.h */ +void +br_i15_decode(uint16_t *x, const void *src, size_t len) +{ + const unsigned char *buf; + size_t v; + uint32_t acc; + int acc_len; + + buf = src; + v = 1; + acc = 0; + acc_len = 0; + while (len -- > 0) { + uint32_t b; + + b = buf[len]; + acc |= (b << acc_len); + acc_len += 8; + if (acc_len >= 15) { + x[v ++] = acc & 0x7FFF; + acc_len -= 15; + acc >>= 15; + } + } + if (acc_len != 0) { + x[v ++] = acc; + } + x[0] = br_i15_bit_length(x + 1, v - 1); +} + +/* see inner.h */ +void +br_i15_from_monty(uint16_t *x, const uint16_t *m, uint16_t m0i) +{ + size_t len, u, v; + + len = (m[0] + 15) >> 4; + for (u = 0; u < len; u ++) { + uint32_t f, cc; + + f = MUL15(x[1], m0i) & 0x7FFF; + cc = 0; + for (v = 0; v < len; v ++) { + uint32_t z; + + z = (uint32_t)x[v + 1] + MUL15(f, m[v + 1]) + cc; + cc = z >> 15; + if (v != 0) { + x[v] = z & 0x7FFF; + } + } + x[len] = cc; + } + + /* + * We may have to do an extra subtraction, but only if the + * value in x[] is indeed greater than or equal to that of m[], + * which is why we must do two calls (first call computes the + * carry, second call performs the subtraction only if the carry + * is 0). + */ + br_i15_sub(x, m, NOT(br_i15_sub(x, m, 0))); +} diff --git a/src/int/i15_ext2.c b/src/int/i15_ext2.c new file mode 100644 index 0000000..84fc2d6 --- /dev/null +++ b/src/int/i15_ext2.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * This file contains some additional functions for "i15" big integers. + * These functions are needed to support RSA. + */ + +/* see inner.h */ +void +br_i15_decode_reduce(uint16_t *x, + const void *src, size_t len, const uint16_t *m) +{ + uint32_t m_ebitlen, m_rbitlen; + size_t mblen, k; + const unsigned char *buf; + uint32_t acc; + int acc_len; + + /* + * Get the encoded bit length. + */ + m_ebitlen = m[0]; + + /* + * Special case for an invalid (null) modulus. + */ + if (m_ebitlen == 0) { + x[0] = 0; + return; + } + + /* + * Clear the destination. + */ + br_i15_zero(x, m_ebitlen); + + /* + * First decode directly as many bytes as possible. This requires + * computing the actual bit length. + */ + m_rbitlen = m_ebitlen >> 4; + m_rbitlen = (m_ebitlen & 15) + (m_rbitlen << 4) - m_rbitlen; + mblen = (m_rbitlen + 7) >> 3; + k = mblen - 1; + if (k >= len) { + br_i15_decode(x, src, len); + x[0] = m_ebitlen; + return; + } + buf = src; + br_i15_decode(x, buf, k); + x[0] = m_ebitlen; + + /* + * Input remaining bytes, using 15-bit words. + */ + acc = 0; + acc_len = 0; + while (k < len) { + uint32_t v; + + v = buf[k ++]; + acc = (acc << 8) | v; + acc_len += 8; + if (acc_len >= 15) { + br_i15_muladd_small(x, acc >> (acc_len - 15), m); + acc_len -= 15; + acc &= ~((uint32_t)-1 << acc_len); + } + } + + /* + * We may have some bits accumulated. We then perform a shift to + * be able to inject these bits as a full 15-bit word. + */ + if (acc_len != 0) { + acc = (acc | (x[1] << acc_len)) & 0x7FFF; + br_i15_rshift(x, 15 - acc_len); + br_i15_muladd_small(x, acc, m); + } +} + +/* see inner.h */ +void +br_i15_reduce(uint16_t *x, const uint16_t *a, const uint16_t *m) +{ + uint32_t m_bitlen, a_bitlen; + size_t mlen, alen, u; + + m_bitlen = m[0]; + mlen = (m_bitlen + 15) >> 4; + + x[0] = m_bitlen; + if (m_bitlen == 0) { + return; + } + + /* + * If the source is shorter, then simply copy all words from a[] + * and zero out the upper words. + */ + a_bitlen = a[0]; + alen = (a_bitlen + 15) >> 4; + if (a_bitlen < m_bitlen) { + memcpy(x + 1, a + 1, alen * sizeof *a); + for (u = alen; u < mlen; u ++) { + x[u + 1] = 0; + } + return; + } + + /* + * The source length is at least equal to that of the modulus. + * We must thus copy N-1 words, and input the remaining words + * one by one. + */ + memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a); + x[mlen] = 0; + for (u = 1 + alen - mlen; u > 0; u --) { + br_i15_muladd_small(x, a[u], m); + } +} + +/* see inner.h */ +void +br_i15_mulacc(uint16_t *d, const uint16_t *a, const uint16_t *b) +{ + size_t alen, blen, u; + + alen = (a[0] + 15) >> 4; + blen = (b[0] + 15) >> 4; + d[0] = a[0] + b[0]; + for (u = 0; u < blen; u ++) { + uint32_t f; + size_t v; + uint32_t cc; + + f = b[1 + u]; + cc = 0; + for (v = 0; v < alen; v ++) { + uint32_t z; + + z = (uint32_t)d[1 + u + v] + MUL15(f, a[1 + v]) + cc; + cc = z >> 15; + d[1 + u + v] = z & 0x7FFF; + } + d[1 + u + alen] = cc; + } +} diff --git a/src/int/i31_fmont.c b/src/int/i31_fmont.c index 4e14361..c24b417 100644 --- a/src/int/i31_fmont.c +++ b/src/int/i31_fmont.c @@ -35,7 +35,7 @@ br_i31_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i) uint32_t f; uint64_t cc; - f = (x[1] * m0i) & 0x7FFFFFFF; + f = MUL31_lo(x[1], m0i); cc = 0; for (v = 0; v < len; v ++) { uint64_t z; diff --git a/src/int/i31_montmul.c b/src/int/i31_montmul.c index 0857797..b953aa3 100644 --- a/src/int/i31_montmul.c +++ b/src/int/i31_montmul.c @@ -41,7 +41,7 @@ br_i31_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y, uint64_t r, zh; xu = x[u + 1]; - f = ((d[1] + x[u + 1] * y[1]) * m0i) & 0x7FFFFFFF; + f = MUL31_lo((d[1] + MUL31_lo(x[u + 1], y[1])), m0i); r = 0; for (v = 0; v < len4; v += 4) { diff --git a/src/int/i31_muladd.c b/src/int/i31_muladd.c index 6633d92..eecd9e2 100644 --- a/src/int/i31_muladd.c +++ b/src/int/i31_muladd.c @@ -75,7 +75,7 @@ br_i31_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m) * * If a = b*q + r (with 0 <= r < q), we can estimate q by * doing an Euclidean division on the top words: - * a0*w+a1 = b0*u + v (with 0 <= v < w) + * a0*w+a1 = b0*u + v (with 0 <= v < b0) * Then the following holds: * 0 <= u <= w * u-2 <= q <= u diff --git a/src/rsa/rsa_i15_pkcs1_sign.c b/src/rsa/rsa_i15_pkcs1_sign.c new file mode 100644 index 0000000..f519423 --- /dev/null +++ b/src/rsa/rsa_i15_pkcs1_sign.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i15_pkcs1_sign(const unsigned char *hash_oid, + const unsigned char *hash, size_t hash_len, + const br_rsa_private_key *sk, unsigned char *x) +{ + if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) { + return 0; + } + return br_rsa_i15_private(x, sk); +} diff --git a/src/rsa/rsa_i15_pkcs1_vrfy.c b/src/rsa/rsa_i15_pkcs1_vrfy.c new file mode 100644 index 0000000..2c35184 --- /dev/null +++ b/src/rsa/rsa_i15_pkcs1_vrfy.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i15_pkcs1_vrfy(const unsigned char *x, size_t xlen, + const unsigned char *hash_oid, size_t hash_len, + const br_rsa_public_key *pk, unsigned char *hash_out) +{ + unsigned char sig[BR_MAX_RSA_SIZE >> 3]; + + if (xlen > (sizeof sig)) { + return 0; + } + memcpy(sig, x, xlen); + if (!br_rsa_i15_public(sig, xlen, pk)) { + return 0; + } + return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out); +} diff --git a/src/rsa/rsa_i15_priv.c b/src/rsa/rsa_i15_priv.c new file mode 100644 index 0000000..6ecb198 --- /dev/null +++ b/src/rsa/rsa_i15_priv.c @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +#define U (1 + ((BR_MAX_RSA_FACTOR + 14) / 15)) + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i15_private(unsigned char *x, const br_rsa_private_key *sk) +{ + const unsigned char *p, *q; + size_t plen, qlen; + uint16_t tmp[6 * U]; + uint16_t *mp, *mq, *s1, *s2, *t1, *t2, *t3; + uint16_t p0i, q0i; + size_t xlen; + + /* + * All our temporary buffers are from the tmp[] array. + * + * The mp, mq, s1, s2, t1 and t2 buffers are large enough to + * contain a RSA factor. The t3 buffer can contain a complete + * RSA modulus. t3 shares its storage space with s2, s1 and t1, + * in that order (this is important, see below). + */ + mq = tmp; + mp = tmp + U; + t2 = tmp + 2 * U; + s2 = tmp + 3 * U; + s1 = tmp + 4 * U; + t1 = tmp + 5 * U; + t3 = s2; + + /* + * Compute the actual lengths (in bytes) of p and q, and check + * that they fit within our stack buffers. + */ + p = sk->p; + plen = sk->plen; + while (plen > 0 && *p == 0) { + p ++; + plen --; + } + q = sk->q; + qlen = sk->qlen; + while (qlen > 0 && *q == 0) { + q ++; + qlen --; + } + if (plen > (BR_MAX_RSA_FACTOR >> 3) + || qlen > (BR_MAX_RSA_FACTOR >> 3)) + { + return 0; + } + + /* + * Decode p and q. + */ + br_i15_decode(mp, p, plen); + br_i15_decode(mq, q, qlen); + + /* + * Compute signature length (in bytes). + */ + xlen = (sk->n_bitlen + 7) >> 3; + + /* + * Compute s1 = x^dp mod p. + */ + p0i = br_i15_ninv15(mp[1]); + br_i15_decode_reduce(s1, x, xlen, mp); + br_i15_modpow(s1, sk->dp, sk->dplen, mp, p0i, t1, t2); + + /* + * Compute s2 = x^dq mod q. + */ + q0i = br_i15_ninv15(mq[1]); + br_i15_decode_reduce(s2, x, xlen, mq); + br_i15_modpow(s2, sk->dq, sk->dqlen, mq, q0i, t1, t2); + + /* + * Compute: + * h = (s1 - s2)*(1/q) mod p + * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is + * unclear about whether p may be lower than q (some existing, + * widely deployed implementations of RSA don't tolerate p < q), + * but we want to support that occurrence, so we need to use the + * reduction function. + * + * Since we use br_i15_decode_reduce() for iq (purportedly, the + * inverse of q modulo p), we also tolerate improperly large + * values for this parameter. + */ + br_i15_reduce(t2, s2, mp); + br_i15_add(s1, mp, br_i15_sub(s1, t2, 1)); + br_i15_to_monty(s1, mp); + br_i15_decode_reduce(t1, sk->iq, sk->iqlen, mp); + br_i15_montymul(t2, s1, t1, mp, p0i); + + /* + * h is now in t2. We compute the final result: + * s = s2 + q*h + * All these operations are non-modular. + * + * We need mq, s2 and t2. We use the t3 buffer as destination. + * The buffers mp, s1 and t1 are no longer needed. Moreover, + * the first step is to copy s2 into the destination buffer t3. + * We thus arranged for t3 to actually share space with s2, and + * to be followed by the space formerly used by s1 and t1. + */ + br_i15_mulacc(t3, mq, t2); + + /* + * Encode the result. Since we already checked the value of xlen, + * we can just use it right away. + */ + br_i15_encode(x, xlen, t3); + + /* + * The only error conditions remaining at that point are invalid + * values for p and q (even integers). + */ + return p0i & q0i & 1; +} diff --git a/src/rsa/rsa_i15_pub.c b/src/rsa/rsa_i15_pub.c new file mode 100644 index 0000000..37ebbd1 --- /dev/null +++ b/src/rsa/rsa_i15_pub.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_i15_public(unsigned char *x, size_t xlen, + const br_rsa_public_key *pk) +{ + const unsigned char *n; + size_t nlen; + uint16_t m[1 + ((BR_MAX_RSA_SIZE + 14) / 15)]; + uint16_t a[1 + ((BR_MAX_RSA_SIZE + 14) / 15)]; + uint16_t t1[1 + ((BR_MAX_RSA_SIZE + 14) / 15)]; + uint16_t t2[1 + ((BR_MAX_RSA_SIZE + 14) / 15)]; + uint16_t m0i; + uint32_t r; + + /* + * Get the actual length of the modulus, and see if it fits within + * our stack buffer. We also check that the length of x[] is valid. + */ + n = pk->n; + nlen = pk->nlen; + while (nlen > 0 && *n == 0) { + n ++; + nlen --; + } + if (nlen == 0 || nlen > (BR_MAX_RSA_SIZE >> 3) || xlen != nlen) { + return 0; + } + br_i15_decode(m, n, nlen); + m0i = br_i15_ninv15(m[1]); + + /* + * Note: if m[] is even, then m0i == 0. Otherwise, m0i must be + * an odd integer. + */ + r = m0i & 1; + + /* + * Decode x[] into a[]; we also check that its value is proper. + */ + r &= br_i15_decode_mod(a, x, xlen, m); + + /* + * Compute the modular exponentiation. + */ + br_i15_modpow(a, pk->e, pk->elen, m, m0i, t1, t2); + + /* + * Encode the result. + */ + br_i15_encode(x, xlen, a); + return r; +} diff --git a/src/rsa/rsa_i31_pkcs1_sign.c b/src/rsa/rsa_i31_pkcs1_sign.c index 431a119..784d3c2 100644 --- a/src/rsa/rsa_i31_pkcs1_sign.c +++ b/src/rsa/rsa_i31_pkcs1_sign.c @@ -30,75 +30,8 @@ br_rsa_i31_pkcs1_sign(const unsigned char *hash_oid, const unsigned char *hash, size_t hash_len, const br_rsa_private_key *sk, unsigned char *x) { - size_t u, x3, xlen; - - /* - * Padded hash value has format: - * 00 01 FF .. FF 00 30 x1 30 x2 06 x3 OID 05 00 04 x4 HASH - * - * with the following rules: - * - * -- Total length is equal to the modulus length (unsigned - * encoding). - * - * -- There must be at least eight bytes of value 0xFF. - * - * -- x4 is equal to the hash length (hash_len). - * - * -- x3 is equal to the encoded OID value length (hash_oid[0]). - * - * -- x2 = x3 + 4. - * - * -- x1 = x2 + x4 + 4 = x3 + x4 + 8. - * - * Note: the "05 00" is optional (signatures with and without - * that sequence exist in practice), but notes in PKCS#1 seem to - * indicate that the presence of that sequence (specifically, - * an ASN.1 NULL value for the hash parameters) may be slightly - * more "standard" than the opposite. - */ - xlen = (sk->n_bitlen + 7) >> 3; - - if (hash_oid == NULL) { - if (xlen < hash_len + 11) { - return 0; - } - x[0] = 0x00; - x[1] = 0x01; - u = xlen - hash_len; - memset(x + 2, 0xFF, u - 3); - x[u - 1] = 0x00; - } else { - x3 = hash_oid[0]; - - /* - * Check that there is enough room for all the elements, - * including at least eight bytes of value 0xFF. - */ - if (xlen < (x3 + hash_len + 21)) { - return 0; - } - x[0] = 0x00; - x[1] = 0x01; - u = xlen - x3 - hash_len - 11; - memset(x + 2, 0xFF, u - 2); - x[u] = 0x00; - x[u + 1] = 0x30; - x[u + 2] = x3 + hash_len + 8; - x[u + 3] = 0x30; - x[u + 4] = x3 + 4; - x[u + 5] = 0x06; - memcpy(x + u + 6, hash_oid, x3 + 1); - u += x3 + 7; - x[u ++] = 0x05; - x[u ++] = 0x00; - x[u ++] = 0x04; - x[u ++] = hash_len; + if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) { + return 0; } - memcpy(x + u, hash, hash_len); - - /* - * Do the actual computation. - */ return br_rsa_i31_private(x, sk); } diff --git a/src/rsa/rsa_i31_pkcs1_vrfy.c b/src/rsa/rsa_i31_pkcs1_vrfy.c index c3ffb62..e79a002 100644 --- a/src/rsa/rsa_i31_pkcs1_vrfy.c +++ b/src/rsa/rsa_i31_pkcs1_vrfy.c @@ -30,97 +30,14 @@ br_rsa_i31_pkcs1_vrfy(const unsigned char *x, size_t xlen, const unsigned char *hash_oid, size_t hash_len, const br_rsa_public_key *pk, unsigned char *hash_out) { - static const unsigned char pad1[] = { - 0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF - }; - unsigned char sig[BR_MAX_RSA_SIZE >> 3]; - unsigned char pad2[43]; - size_t u, x2, x3, pad_len, zlen; - if (xlen > (sizeof sig) || xlen < 11) { + if (xlen > (sizeof sig)) { return 0; } memcpy(sig, x, xlen); if (!br_rsa_i31_public(sig, xlen, pk)) { return 0; } - - /* - * Expected format: - * 00 01 FF ... FF 00 30 x1 30 x2 06 x3 OID [ 05 00 ] 04 x4 HASH - * - * with the following rules: - * - * -- Total length is that of the modulus and the signature - * (this was already verified by br_rsa_i31_public()). - * - * -- There are at least eight bytes of value 0xFF. - * - * -- x4 is equal to the hash length (hash_len). - * - * -- x3 is equal to the encoded OID value length (so x3 is the - * first byte of hash_oid[]). - * - * -- If the "05 00" is present, then x2 == x3 + 4; otherwise, - * x2 == x3 + 2. - * - * -- x1 == x2 + x4 + 4. - * - * So the total length after the last "FF" is either x3 + x4 + 11 - * (with the "05 00") or x3 + x4 + 9 (without the "05 00"). - */ - - /* - * Check the "00 01 FF .. FF 00" with at least eight 0xFF bytes. - * The comparaison is valid because we made sure that the signature - * is at least 11 bytes long. - */ - if (memcmp(sig, pad1, sizeof pad1) != 0) { - return 0; - } - for (u = sizeof pad1; u < xlen; u ++) { - if (sig[u] != 0xFF) { - break; - } - } - - /* - * Remaining length is xlen - u bytes (including the 00 just - * after the last FF). This must be equal to one of the two - * possible values (depending on whether the "05 00" sequence is - * present or not). - */ - if (hash_oid == NULL) { - if (xlen - u != hash_len + 1 || sig[u] != 0x00) { - return 0; - } - } else { - x3 = hash_oid[0]; - pad_len = x3 + 9; - memset(pad2, 0, pad_len); - zlen = xlen - u - hash_len; - if (zlen == pad_len) { - x2 = x3 + 2; - } else if (zlen == pad_len + 2) { - x2 = x3 + 4; - pad_len = zlen; - pad2[pad_len - 4] = 0x05; - } else { - return 0; - } - pad2[1] = 0x30; - pad2[2] = x2 + hash_len + 4; - pad2[3] = 0x30; - pad2[4] = x2; - pad2[5] = 0x06; - memcpy(pad2 + 6, hash_oid, x3 + 1); - pad2[pad_len - 2] = 0x04; - pad2[pad_len - 1] = hash_len; - if (memcmp(pad2, sig + u, pad_len) != 0) { - return 0; - } - } - memcpy(hash_out, sig + xlen - hash_len, hash_len); - return 1; + return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out); } diff --git a/src/rsa/rsa_i32_pkcs1_sign.c b/src/rsa/rsa_i32_pkcs1_sign.c index c901bad..44b6e6d 100644 --- a/src/rsa/rsa_i32_pkcs1_sign.c +++ b/src/rsa/rsa_i32_pkcs1_sign.c @@ -30,75 +30,8 @@ br_rsa_i32_pkcs1_sign(const unsigned char *hash_oid, const unsigned char *hash, size_t hash_len, const br_rsa_private_key *sk, unsigned char *x) { - size_t u, x3, xlen; - - /* - * Padded hash value has format: - * 00 01 FF .. FF 00 30 x1 30 x2 06 x3 OID 05 00 04 x4 HASH - * - * with the following rules: - * - * -- Total length is equal to the modulus length (unsigned - * encoding). - * - * -- There must be at least eight bytes of value 0xFF. - * - * -- x4 is equal to the hash length (hash_len). - * - * -- x3 is equal to the encoded OID value length (hash_oid[0]). - * - * -- x2 = x3 + 4. - * - * -- x1 = x2 + x4 + 4 = x3 + x4 + 8. - * - * Note: the "05 00" is optional (signatures with and without - * that sequence exist in practice), but notes in PKCS#1 seem to - * indicate that the presence of that sequence (specifically, - * an ASN.1 NULL value for the hash parameters) may be slightly - * more "standard" than the opposite. - */ - xlen = (sk->n_bitlen + 7) >> 3; - - if (hash_oid == NULL) { - if (xlen < hash_len + 11) { - return 0; - } - x[0] = 0x00; - x[1] = 0x01; - u = xlen - hash_len; - memset(x + 2, 0xFF, u - 3); - x[u - 1] = 0x00; - } else { - x3 = hash_oid[0]; - - /* - * Check that there is enough room for all the elements, - * including at least eight bytes of value 0xFF. - */ - if (xlen < (x3 + hash_len + 21)) { - return 0; - } - x[0] = 0x00; - x[1] = 0x01; - u = xlen - x3 - hash_len - 11; - memset(x + 2, 0xFF, u - 2); - x[u] = 0x00; - x[u + 1] = 0x30; - x[u + 2] = x3 + hash_len + 8; - x[u + 3] = 0x30; - x[u + 4] = x3 + 4; - x[u + 5] = 0x06; - memcpy(x + u + 6, hash_oid, x3 + 1); - u += x3 + 7; - x[u ++] = 0x05; - x[u ++] = 0x00; - x[u ++] = 0x04; - x[u ++] = hash_len; + if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) { + return 0; } - memcpy(x + u, hash, hash_len); - - /* - * Do the actual computation. - */ return br_rsa_i32_private(x, sk); } diff --git a/src/rsa/rsa_i32_pkcs1_vrfy.c b/src/rsa/rsa_i32_pkcs1_vrfy.c index cc20ba8..6ee7a19 100644 --- a/src/rsa/rsa_i32_pkcs1_vrfy.c +++ b/src/rsa/rsa_i32_pkcs1_vrfy.c @@ -30,97 +30,14 @@ br_rsa_i32_pkcs1_vrfy(const unsigned char *x, size_t xlen, const unsigned char *hash_oid, size_t hash_len, const br_rsa_public_key *pk, unsigned char *hash_out) { - static const unsigned char pad1[] = { - 0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF - }; - unsigned char sig[BR_MAX_RSA_SIZE >> 3]; - unsigned char pad2[43]; - size_t u, x2, x3, pad_len, zlen; - if (xlen > (sizeof sig) || xlen < 11) { + if (xlen > (sizeof sig)) { return 0; } memcpy(sig, x, xlen); if (!br_rsa_i32_public(sig, xlen, pk)) { return 0; } - - /* - * Expected format: - * 00 01 FF ... FF 00 30 x1 30 x2 06 x3 OID [ 05 00 ] 04 x4 HASH - * - * with the following rules: - * - * -- Total length is that of the modulus and the signature - * (this was already verified by br_rsa_i32_public()). - * - * -- There are at least eight bytes of value 0xFF. - * - * -- x4 is equal to the hash length (hash_len). - * - * -- x3 is equal to the encoded OID value length (so x3 is the - * first byte of hash_oid[]). - * - * -- If the "05 00" is present, then x2 == x3 + 4; otherwise, - * x2 == x3 + 2. - * - * -- x1 == x2 + x4 + 4. - * - * So the total length after the last "FF" is either x3 + x4 + 11 - * (with the "05 00") or x3 + x4 + 9 (without the "05 00"). - */ - - /* - * Check the "00 01 FF .. FF 00" with at least eight 0xFF bytes. - * The comparaison is valid because we made sure that the signature - * is at least 11 bytes long. - */ - if (memcmp(sig, pad1, sizeof pad1) != 0) { - return 0; - } - for (u = sizeof pad1; u < xlen; u ++) { - if (sig[u] != 0xFF) { - break; - } - } - - /* - * Remaining length is xlen - u bytes (including the 00 just - * after the last FF). This must be equal to one of the two - * possible values (depending on whether the "05 00" sequence is - * present or not). - */ - if (hash_oid == NULL) { - if (xlen - u != hash_len + 1 || sig[u] != 0x00) { - return 0; - } - } else { - x3 = hash_oid[0]; - pad_len = x3 + 9; - memset(pad2, 0, pad_len); - zlen = xlen - u - hash_len; - if (zlen == pad_len) { - x2 = x3 + 2; - } else if (zlen == pad_len + 2) { - x2 = x3 + 4; - pad_len = zlen; - pad2[pad_len - 4] = 0x05; - } else { - return 0; - } - pad2[1] = 0x30; - pad2[2] = x2 + hash_len + 4; - pad2[3] = 0x30; - pad2[4] = x2; - pad2[5] = 0x06; - memcpy(pad2 + 6, hash_oid, x3 + 1); - pad2[pad_len - 2] = 0x04; - pad2[pad_len - 1] = hash_len; - if (memcmp(pad2, sig + u, pad_len) != 0) { - return 0; - } - } - memcpy(hash_out, sig + xlen - hash_len, hash_len); - return 1; + return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out); } diff --git a/src/rsa/rsa_pkcs1_sig_pad.c b/src/rsa/rsa_pkcs1_sig_pad.c new file mode 100644 index 0000000..06c3bd7 --- /dev/null +++ b/src/rsa/rsa_pkcs1_sig_pad.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see inner.h */ +uint32_t +br_rsa_pkcs1_sig_pad(const unsigned char *hash_oid, + const unsigned char *hash, size_t hash_len, + uint32_t n_bitlen, unsigned char *x) +{ + size_t u, x3, xlen; + + /* + * Padded hash value has format: + * 00 01 FF .. FF 00 30 x1 30 x2 06 x3 OID 05 00 04 x4 HASH + * + * with the following rules: + * + * -- Total length is equal to the modulus length (unsigned + * encoding). + * + * -- There must be at least eight bytes of value 0xFF. + * + * -- x4 is equal to the hash length (hash_len). + * + * -- x3 is equal to the encoded OID value length (hash_oid[0]). + * + * -- x2 = x3 + 4. + * + * -- x1 = x2 + x4 + 4 = x3 + x4 + 8. + * + * Note: the "05 00" is optional (signatures with and without + * that sequence exist in practice), but notes in PKCS#1 seem to + * indicate that the presence of that sequence (specifically, + * an ASN.1 NULL value for the hash parameters) may be slightly + * more "standard" than the opposite. + */ + xlen = (n_bitlen + 7) >> 3; + + if (hash_oid == NULL) { + if (xlen < hash_len + 11) { + return 0; + } + x[0] = 0x00; + x[1] = 0x01; + u = xlen - hash_len; + memset(x + 2, 0xFF, u - 3); + x[u - 1] = 0x00; + } else { + x3 = hash_oid[0]; + + /* + * Check that there is enough room for all the elements, + * including at least eight bytes of value 0xFF. + */ + if (xlen < (x3 + hash_len + 21)) { + return 0; + } + x[0] = 0x00; + x[1] = 0x01; + u = xlen - x3 - hash_len - 11; + memset(x + 2, 0xFF, u - 2); + x[u] = 0x00; + x[u + 1] = 0x30; + x[u + 2] = x3 + hash_len + 8; + x[u + 3] = 0x30; + x[u + 4] = x3 + 4; + x[u + 5] = 0x06; + memcpy(x + u + 6, hash_oid, x3 + 1); + u += x3 + 7; + x[u ++] = 0x05; + x[u ++] = 0x00; + x[u ++] = 0x04; + x[u ++] = hash_len; + } + memcpy(x + u, hash, hash_len); + return 1; +} diff --git a/src/rsa/rsa_pkcs1_sig_unpad.c b/src/rsa/rsa_pkcs1_sig_unpad.c new file mode 100644 index 0000000..d179945 --- /dev/null +++ b/src/rsa/rsa_pkcs1_sig_unpad.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* see bearssl_rsa.h */ +uint32_t +br_rsa_pkcs1_sig_unpad(const unsigned char *sig, size_t sig_len, + const unsigned char *hash_oid, size_t hash_len, + unsigned char *hash_out) +{ + static const unsigned char pad1[] = { + 0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF + }; + + unsigned char pad2[43]; + size_t u, x2, x3, pad_len, zlen; + + if (sig_len < 11) { + return 0; + } + + /* + * Expected format: + * 00 01 FF ... FF 00 30 x1 30 x2 06 x3 OID [ 05 00 ] 04 x4 HASH + * + * with the following rules: + * + * -- Total length is that of the modulus and the signature + * (this was already verified by br_rsa_i31_public()). + * + * -- There are at least eight bytes of value 0xFF. + * + * -- x4 is equal to the hash length (hash_len). + * + * -- x3 is equal to the encoded OID value length (so x3 is the + * first byte of hash_oid[]). + * + * -- If the "05 00" is present, then x2 == x3 + 4; otherwise, + * x2 == x3 + 2. + * + * -- x1 == x2 + x4 + 4. + * + * So the total length after the last "FF" is either x3 + x4 + 11 + * (with the "05 00") or x3 + x4 + 9 (without the "05 00"). + */ + + /* + * Check the "00 01 FF .. FF 00" with at least eight 0xFF bytes. + * The comparaison is valid because we made sure that the signature + * is at least 11 bytes long. + */ + if (memcmp(sig, pad1, sizeof pad1) != 0) { + return 0; + } + for (u = sizeof pad1; u < sig_len; u ++) { + if (sig[u] != 0xFF) { + break; + } + } + + /* + * Remaining length is sig_len - u bytes (including the 00 just + * after the last FF). This must be equal to one of the two + * possible values (depending on whether the "05 00" sequence is + * present or not). + */ + if (hash_oid == NULL) { + if (sig_len - u != hash_len + 1 || sig[u] != 0x00) { + return 0; + } + } else { + x3 = hash_oid[0]; + pad_len = x3 + 9; + memset(pad2, 0, pad_len); + zlen = sig_len - u - hash_len; + if (zlen == pad_len) { + x2 = x3 + 2; + } else if (zlen == pad_len + 2) { + x2 = x3 + 4; + pad_len = zlen; + pad2[pad_len - 4] = 0x05; + } else { + return 0; + } + pad2[1] = 0x30; + pad2[2] = x2 + hash_len + 4; + pad2[3] = 0x30; + pad2[4] = x2; + pad2[5] = 0x06; + memcpy(pad2 + 6, hash_oid, x3 + 1); + pad2[pad_len - 2] = 0x04; + pad2[pad_len - 1] = hash_len; + if (memcmp(pad2, sig + u, pad_len) != 0) { + return 0; + } + } + memcpy(hash_out, sig + sig_len - hash_len, hash_len); + return 1; +} diff --git a/test/test_crypto.c b/test/test_crypto.c index 10faa9c..bed4927 100644 --- a/test/test_crypto.c +++ b/test/test_crypto.c @@ -4367,6 +4367,14 @@ test_RSA_sign(const char *name, br_rsa_private fpriv, fflush(stdout); } +static void +test_RSA_i15(void) +{ + test_RSA_core("RSA i15 core", &br_rsa_i15_public, &br_rsa_i15_private); + test_RSA_sign("RSA i15 sign", &br_rsa_i15_private, + &br_rsa_i15_pkcs1_sign, &br_rsa_i15_pkcs1_vrfy); +} + static void test_RSA_i31(void) { @@ -4813,6 +4821,15 @@ test_EC_KAT(const char *name, const br_ec_impl *impl, uint32_t curve_mask) fflush(stdout); } +static void +test_EC_prime_i15(void) +{ + test_EC_KAT("EC_prime_i15", &br_ec_prime_i15, + (uint32_t)1 << BR_EC_secp256r1 + | (uint32_t)1 << BR_EC_secp384r1 + | (uint32_t)1 << BR_EC_secp521r1); +} + static void test_EC_prime_i31(void) { @@ -5201,7 +5218,8 @@ const ecdsa_kat_vector ECDSA_KAT[] = { }; static void -test_ECDSA_KAT(br_ecdsa_sign sign, br_ecdsa_vrfy vrfy, int asn1) +test_ECDSA_KAT(const br_ec_impl *iec, + br_ecdsa_sign sign, br_ecdsa_vrfy vrfy, int asn1) { size_t u; @@ -5228,28 +5246,28 @@ test_ECDSA_KAT(br_ecdsa_sign sign, br_ecdsa_vrfy vrfy, int asn1) sig_len = hextobin(sig, kv->sraw); } - if (vrfy(&br_ec_prime_i31, hash, hash_len, + if (vrfy(iec, hash, hash_len, kv->pub, sig, sig_len) != 1) { fprintf(stderr, "ECDSA KAT verify failed (1)\n"); exit(EXIT_FAILURE); } hash[0] ^= 0x80; - if (vrfy(&br_ec_prime_i31, hash, hash_len, + if (vrfy(iec, hash, hash_len, kv->pub, sig, sig_len) != 0) { fprintf(stderr, "ECDSA KAT verify shoud have failed\n"); exit(EXIT_FAILURE); } hash[0] ^= 0x80; - if (vrfy(&br_ec_prime_i31, hash, hash_len, + if (vrfy(iec, hash, hash_len, kv->pub, sig, sig_len) != 1) { fprintf(stderr, "ECDSA KAT verify failed (2)\n"); exit(EXIT_FAILURE); } - sig2_len = sign(&br_ec_prime_i31, kv->hf, hash, kv->priv, sig2); + sig2_len = sign(iec, kv->hf, hash, kv->priv, sig2); if (sig2_len == 0) { fprintf(stderr, "ECDSA KAT sign failed\n"); exit(EXIT_FAILURE); @@ -5271,10 +5289,29 @@ test_ECDSA_i31(void) fflush(stdout); printf("[raw]"); fflush(stdout); - test_ECDSA_KAT(&br_ecdsa_i31_sign_raw, &br_ecdsa_i31_vrfy_raw, 0); + test_ECDSA_KAT(&br_ec_prime_i31, + &br_ecdsa_i31_sign_raw, &br_ecdsa_i31_vrfy_raw, 0); + printf(" [asn1]"); + fflush(stdout); + test_ECDSA_KAT(&br_ec_prime_i31, + &br_ecdsa_i31_sign_asn1, &br_ecdsa_i31_vrfy_asn1, 1); + printf(" done.\n"); + fflush(stdout); +} + +static void +test_ECDSA_i15(void) +{ + printf("Test ECDSA/i15: "); + fflush(stdout); + printf("[raw]"); + fflush(stdout); + test_ECDSA_KAT(&br_ec_prime_i15, + &br_ecdsa_i15_sign_raw, &br_ecdsa_i15_vrfy_raw, 0); printf(" [asn1]"); fflush(stdout); - test_ECDSA_KAT(&br_ecdsa_i31_sign_asn1, &br_ecdsa_i31_vrfy_asn1, 1); + test_ECDSA_KAT(&br_ec_prime_i31, + &br_ecdsa_i15_sign_asn1, &br_ecdsa_i15_vrfy_asn1, 1); printf(" done.\n"); fflush(stdout); } @@ -5343,14 +5380,17 @@ static const struct { STU(DES_ct), STU(ChaCha20_ct), STU(Poly1305_ctmul), + STU(RSA_i15), STU(RSA_i31), STU(RSA_i32), STU(GHASH_ctmul), STU(GHASH_ctmul32), STU(GHASH_ctmul64), + STU(EC_prime_i15), STU(EC_prime_i31), STU(EC_p256_i15), /* STU(EC_prime_i32), */ + STU(ECDSA_i15), STU(ECDSA_i31), { 0, 0 } }; diff --git a/test/test_speed.c b/test/test_speed.c index fccb5eb..5e17714 100644 --- a/test/test_speed.c +++ b/test/test_speed.c @@ -557,6 +557,13 @@ test_speed_rsa_inner(char *name, } } +static void +test_speed_rsa_i15(void) +{ + test_speed_rsa_inner("RSA i15", + &br_rsa_i15_public, &br_rsa_i15_private); +} + static void test_speed_rsa_i31(void) { @@ -616,7 +623,16 @@ test_speed_ec_inner(const char *name, static void test_speed_ec_p256_i15(void) { - test_speed_ec_inner("EC i15 P-256", &br_ec_p256_i15, &br_secp256r1); + test_speed_ec_inner("EC i15/spec P-256", + &br_ec_p256_i15, &br_secp256r1); +} + +static void +test_speed_ec_prime_i15(void) +{ + test_speed_ec_inner("EC i15 P-256", &br_ec_prime_i15, &br_secp256r1); + test_speed_ec_inner("EC i15 P-384", &br_ec_prime_i15, &br_secp384r1); + test_speed_ec_inner("EC i15 P-521", &br_ec_prime_i15, &br_secp521r1); } static void @@ -712,6 +728,23 @@ test_speed_ecdsa_inner(const char *name, } } +static void +test_speed_ecdsa_i15(void) +{ + test_speed_ecdsa_inner("ECDSA i15 P-256", + &br_ec_prime_i15, &br_secp256r1, + &br_ecdsa_i15_sign_asn1, + &br_ecdsa_i15_vrfy_asn1); + test_speed_ecdsa_inner("ECDSA i15 P-384", + &br_ec_prime_i15, &br_secp384r1, + &br_ecdsa_i15_sign_asn1, + &br_ecdsa_i15_vrfy_asn1); + test_speed_ecdsa_inner("ECDSA i15 P-521", + &br_ec_prime_i15, &br_secp521r1, + &br_ecdsa_i15_sign_asn1, + &br_ecdsa_i15_vrfy_asn1); +} + static void test_speed_ecdsa_i31(void) { @@ -1134,10 +1167,13 @@ static const struct { STU(poly1305_ctmul), + STU(rsa_i15), STU(rsa_i31), STU(rsa_i32), STU(ec_p256_i15), + STU(ec_prime_i15), STU(ec_prime_i31), + STU(ecdsa_i15), STU(ecdsa_i31), STU(i31)