From: Thomas Pornin <pornin@bolet.org>
Date: Mon, 23 Oct 2017 21:27:28 +0000 (+0200)
Subject: Added generic EAX and CCM implementations.
X-Git-Tag: v0.6~20
X-Git-Url: https://bearssl.org/gitweb//home/git/?a=commitdiff_plain;h=dddc412922f42f9c7dd6177133828be724f44424;p=BearSSL

Added generic EAX and CCM implementations.
---

diff --git a/inc/bearssl_aead.h b/inc/bearssl_aead.h
index 09cb9e8..b1e52a3 100644
--- a/inc/bearssl_aead.h
+++ b/inc/bearssl_aead.h
@@ -127,7 +127,7 @@ extern "C" {
  *
  *   - Nonce, plaintext and additional authenticated data all consist
  *     in an integral number of bytes. There is no provision to use
- *     elements whose lengh in bits is not a multiple of 8.
+ *     elements whose length in bits is not a multiple of 8.
  *
  * Each AEAD algorithm has its own requirements and limits on the sizes
  * of additional data and plaintext. This API does not provide any
@@ -169,6 +169,9 @@ extern "C" {
  * Note that there is no OOP method for context initialisation: the
  * various AEAD algorithms have different requirements that would not
  * map well to a single initialisation API.
+ *
+ * The OOP API is not provided for CCM, due to its specific requirements
+ * (length of plaintext must be known in advance).
  */
 
 /**
@@ -213,7 +216,7 @@ struct br_aead_class_ {
 	 *
 	 * \param cc     AEAD context structure.
 	 * \param data   pointer to additional authenticated data.
-	 * \param len    length of additiona authenticated data (in bytes).
+	 * \param len    length of additional authenticated data (in bytes).
 	 */
 	void (*aad_inject)(const br_aead_class **cc,
 		const void *data, size_t len);
@@ -266,6 +269,8 @@ struct br_aead_class_ {
 	 * `check_tag()` function may be used to compute and check the
 	 * tag value.
 	 *
+	 * Tag length depends on the AEAD algorithm.
+	 *
 	 * \param cc    AEAD context structure.
 	 * \param tag   destination buffer for the tag.
 	 */
@@ -282,11 +287,44 @@ struct br_aead_class_ {
 	 * data or the tag was altered in transit, normally leading to
 	 * wholesale rejection of the complete message.
 	 *
+	 * Tag length depends on the AEAD algorithm.
+	 *
 	 * \param cc    AEAD context structure.
-	 * \param tag   tag value to compare with (16 bytes).
+	 * \param tag   tag value to compare with.
 	 * \return  1 on success (exact match of tag value), 0 otherwise.
 	 */
 	uint32_t (*check_tag)(const br_aead_class **cc, const void *tag);
+
+	/**
+	 * \brief Compute authentication tag (with truncation).
+	 *
+	 * This function is similar to `get_tag()`, except that the tag
+	 * length is provided. Some AEAD algorithms allow several tag
+	 * lengths, usually by truncating the normal tag. Shorter tags
+	 * mechanically increase success probability of forgeries.
+	 * The range of allowed tag lengths depends on the algorithm.
+	 *
+	 * \param cc    AEAD context structure.
+	 * \param tag   destination buffer for the tag.
+	 * \param len   tag length (in bytes).
+	 */
+	void (*get_tag_trunc)(const br_aead_class **cc, void *tag, size_t len);
+
+	/**
+	 * \brief Compute and check authentication tag (with truncation).
+	 *
+	 * This function is similar to `check_tag()` except that it
+	 * works over an explicit tag length. See `get_tag()` for a
+	 * discussion of explicit tag lengths; the range of allowed tag
+	 * lengths depends on the algorithm.
+	 *
+	 * \param cc    AEAD context structure.
+	 * \param tag   tag value to compare with.
+	 * \param len   tag length (in bytes).
+	 * \return  1 on success (exact match of tag value), 0 otherwise.
+	 */
+	uint32_t (*check_tag_trunc)(const br_aead_class **cc,
+		const void *tag, size_t len);
 };
 
 /**
@@ -379,7 +417,7 @@ void br_gcm_reset(br_gcm_context *ctx, const void *iv, size_t len);
  *
  * \param ctx    GCM context structure.
  * \param data   pointer to additional authenticated data.
- * \param len    length of additiona authenticated data (in bytes).
+ * \param len    length of additional authenticated data (in bytes).
  */
 void br_gcm_aad_inject(br_gcm_context *ctx, const void *data, size_t len);
 
@@ -449,11 +487,478 @@ void br_gcm_get_tag(br_gcm_context *ctx, void *tag);
  */
 uint32_t br_gcm_check_tag(br_gcm_context *ctx, const void *tag);
 
+/**
+ * \brief Compute GCM authentication tag (with truncation).
+ *
+ * This function is similar to `br_gcm_get_tag()`, except that it allows
+ * the tag to be truncated to a smaller length. The intended tag length
+ * is provided as `len` (in bytes); it MUST be no more than 16, but
+ * it may be smaller. Note that decreasing tag length mechanically makes
+ * forgeries easier; NIST SP 800-38D specifies that the tag length shall
+ * lie between 12 and 16 bytes (inclusive), but may be truncated down to
+ * 4 or 8 bytes, for specific applications that can tolerate it. It must
+ * also be noted that successful forgeries leak information on the
+ * authentication key, making subsequent forgeries easier. Therefore,
+ * tag truncation, and in particular truncation to sizes lower than 12
+ * bytes, shall be envisioned only with great care.
+ *
+ * The tag is written in the provided `tag` buffer. This call terminates
+ * the GCM run: no data may be processed with that GCM context
+ * afterwards, until `br_gcm_reset()` is called to initiate a new GCM
+ * run.
+ *
+ * The tag value must normally be sent along with the encrypted data.
+ * When decrypting, the tag value must be recomputed and compared with
+ * the received tag: if the two tag values differ, then either the tag
+ * or the encrypted data was altered in transit. As an alternative to
+ * this function, the `br_gcm_check_tag_trunc()` function can be used to
+ * compute and check the tag value.
+ *
+ * \param ctx   GCM context structure.
+ * \param tag   destination buffer for the tag.
+ * \param len   tag length (16 bytes or less).
+ */
+void br_gcm_get_tag_trunc(br_gcm_context *ctx, void *tag, size_t len);
+
+/**
+ * \brief Compute and check GCM authentication tag (with truncation).
+ *
+ * This function is an alternative to `br_gcm_get_tag_trunc()`, normally used
+ * on the receiving end (i.e. when decrypting value). The tag value is
+ * recomputed and compared with the provided tag value. If they match, 1
+ * is returned; on mismatch, 0 is returned. A returned value of 0 means
+ * that the data or the tag was altered in transit, normally leading to
+ * wholesale rejection of the complete message.
+ *
+ * Tag length MUST be 16 bytes or less. The normal GCM tag length is 16
+ * bytes. See `br_check_tag_trunc()` for some discussion on the potential
+ * perils of truncating authentication tags.
+ *
+ * \param ctx   GCM context structure.
+ * \param tag   tag value to compare with.
+ * \param len   tag length (in bytes).
+ * \return  1 on success (exact match of tag value), 0 otherwise.
+ */
+uint32_t br_gcm_check_tag_trunc(br_gcm_context *ctx,
+	const void *tag, size_t len);
+
 /**
  * \brief Class instance for GCM.
  */
 extern const br_aead_class br_gcm_vtable;
 
+/**
+ * \brief Context structure for EAX.
+ *
+ * EAX is an AEAD mode that combines a block cipher in CTR mode with
+ * CBC-MAC using the same block cipher and the same key, to provide
+ * authenticated encryption:
+ *
+ *   - Any block cipher with 16-byte blocks can be used with EAX
+ *     (technically, other block sizes are defined as well, but this
+ *     is not implemented by these functions; shorter blocks also
+ *     imply numerous security issues).
+ *
+ *   - The nonce can have any length, as long as nonce values are
+ *     not reused (thus, if nonces are randomly selected, the nonce
+ *     size should be such that reuse probability is negligible).
+ *
+ *   - Additional authenticated data length is unlimited.
+ *
+ *   - Message length is unlimited.
+ *
+ *   - The authentication tag has length 16 bytes.
+ *
+ * The EAX initialisation function receives as parameter an
+ * _initialised_ block cipher implementation context, with the secret
+ * key already set. A pointer to that context will be kept within the
+ * EAX context structure. It is up to the caller to allocate and
+ * initialise that block cipher context.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_aead_class *vtable;
+
+#ifndef BR_DOXYGEN_IGNORE
+	const br_block_ctrcbc_class **bctx;
+	unsigned char L2[16];
+	unsigned char L4[16];
+	unsigned char nonce[16];
+	unsigned char head[16];
+	unsigned char ctr[16];
+	unsigned char cbcmac[16];
+	unsigned char buf[16];
+	size_t ptr;
+#endif
+} br_eax_context;
+
+/**
+ * \brief Initialize an EAX context.
+ *
+ * A block cipher implementation, with its initialised context
+ * structure, is provided. The block cipher MUST use 16-byte blocks in
+ * CTR + CBC-MAC mode, and its secret key MUST have been already set in
+ * the provided context. The parameters are linked in the EAX context.
+ *
+ * After this function has been called, the `br_eax_reset()` function must
+ * be called, to provide the nonce for EAX computation.
+ *
+ * \param ctx    EAX context structure.
+ * \param bctx   block cipher context (already initialised with secret key).
+ */
+void br_eax_init(br_eax_context *ctx, const br_block_ctrcbc_class **bctx);
+
+/**
+ * \brief Reset an EAX context.
+ *
+ * This function resets an already initialised EAX context for a new
+ * computation run. Implementations and keys are conserved. This function
+ * can be called at any time; it cancels any ongoing EAX computation that
+ * uses the provided context structure.
+ *
+ * It is critical to EAX security that nonce values are not repeated for
+ * the same encryption key. Nonces can have arbitrary length. If nonces
+ * are randomly generated, then a nonce length of at least 128 bits (16
+ * bytes) is recommended, to make nonce reuse probability sufficiently
+ * low.
+ *
+ * \param ctx     EAX context structure.
+ * \param nonce   EAX nonce to use.
+ * \param len     EAX nonce length (in bytes).
+ */
+void br_eax_reset(br_eax_context *ctx, const void *nonce, size_t len);
+
+/**
+ * \brief Inject additional authenticated data into EAX.
+ *
+ * The provided data is injected into a running EAX computation. Additional
+ * data must be injected _before_ the call to `br_eax_flip()`.
+ * Additional data can be injected in several chunks of arbitrary length;
+ * the total amount of additional authenticated data is unlimited.
+ *
+ * \param ctx    EAX context structure.
+ * \param data   pointer to additional authenticated data.
+ * \param len    length of additional authenticated data (in bytes).
+ */
+void br_eax_aad_inject(br_eax_context *ctx, const void *data, size_t len);
+
+/**
+ * \brief Finish injection of additional authenticated data into EAX.
+ *
+ * This function MUST be called before beginning the actual encryption
+ * or decryption (with `br_eax_run()`), even if no additional authenticated
+ * data was injected. No additional authenticated data may be injected
+ * after this function call.
+ *
+ * \param ctx   EAX context structure.
+ */
+void br_eax_flip(br_eax_context *ctx);
+
+/**
+ * \brief Encrypt or decrypt some data with EAX.
+ *
+ * Data encryption or decryption can be done after `br_eax_flip()`
+ * has been called on the context. If `encrypt` is non-zero, then the
+ * provided data shall be plaintext, and it is encrypted in place.
+ * Otherwise, the data shall be ciphertext, and it is decrypted in place.
+ *
+ * Data may be provided in several chunks of arbitrary length.
+ *
+ * \param ctx       EAX context structure.
+ * \param encrypt   non-zero for encryption, zero for decryption.
+ * \param data      data to encrypt or decrypt.
+ * \param len       data length (in bytes).
+ */
+void br_eax_run(br_eax_context *ctx, int encrypt, void *data, size_t len);
+
+/**
+ * \brief Compute EAX authentication tag.
+ *
+ * Compute the EAX authentication tag. The tag is a 16-byte value which
+ * is written in the provided `tag` buffer. This call terminates the
+ * EAX run: no data may be processed with that EAX context afterwards,
+ * until `br_eax_reset()` is called to initiate a new EAX run.
+ *
+ * The tag value must normally be sent along with the encrypted data.
+ * When decrypting, the tag value must be recomputed and compared with
+ * the received tag: if the two tag values differ, then either the tag
+ * or the encrypted data was altered in transit. As an alternative to
+ * this function, the `br_eax_check_tag()` function can be used to
+ * compute and check the tag value.
+ *
+ * \param ctx   EAX context structure.
+ * \param tag   destination buffer for the tag (16 bytes).
+ */
+void br_eax_get_tag(br_eax_context *ctx, void *tag);
+
+/**
+ * \brief Compute and check EAX authentication tag.
+ *
+ * This function is an alternative to `br_eax_get_tag()`, normally used
+ * on the receiving end (i.e. when decrypting value). The tag value is
+ * recomputed and compared with the provided tag value. If they match, 1
+ * is returned; on mismatch, 0 is returned. A returned value of 0 means
+ * that the data or the tag was altered in transit, normally leading to
+ * wholesale rejection of the complete message.
+ *
+ * \param ctx   EAX context structure.
+ * \param tag   tag value to compare with (16 bytes).
+ * \return  1 on success (exact match of tag value), 0 otherwise.
+ */
+uint32_t br_eax_check_tag(br_eax_context *ctx, const void *tag);
+
+/**
+ * \brief Compute EAX authentication tag (with truncation).
+ *
+ * This function is similar to `br_eax_get_tag()`, except that it allows
+ * the tag to be truncated to a smaller length. The intended tag length
+ * is provided as `len` (in bytes); it MUST be no more than 16, but
+ * it may be smaller. Note that decreasing tag length mechanically makes
+ * forgeries easier; NIST SP 800-38D specifies that the tag length shall
+ * lie between 12 and 16 bytes (inclusive), but may be truncated down to
+ * 4 or 8 bytes, for specific applications that can tolerate it. It must
+ * also be noted that successful forgeries leak information on the
+ * authentication key, making subsequent forgeries easier. Therefore,
+ * tag truncation, and in particular truncation to sizes lower than 12
+ * bytes, shall be envisioned only with great care.
+ *
+ * The tag is written in the provided `tag` buffer. This call terminates
+ * the EAX run: no data may be processed with that EAX context
+ * afterwards, until `br_eax_reset()` is called to initiate a new EAX
+ * run.
+ *
+ * The tag value must normally be sent along with the encrypted data.
+ * When decrypting, the tag value must be recomputed and compared with
+ * the received tag: if the two tag values differ, then either the tag
+ * or the encrypted data was altered in transit. As an alternative to
+ * this function, the `br_eax_check_tag_trunc()` function can be used to
+ * compute and check the tag value.
+ *
+ * \param ctx   EAX context structure.
+ * \param tag   destination buffer for the tag.
+ * \param len   tag length (16 bytes or less).
+ */
+void br_eax_get_tag_trunc(br_eax_context *ctx, void *tag, size_t len);
+
+/**
+ * \brief Compute and check EAX authentication tag (with truncation).
+ *
+ * This function is an alternative to `br_eax_get_tag_trunc()`, normally used
+ * on the receiving end (i.e. when decrypting value). The tag value is
+ * recomputed and compared with the provided tag value. If they match, 1
+ * is returned; on mismatch, 0 is returned. A returned value of 0 means
+ * that the data or the tag was altered in transit, normally leading to
+ * wholesale rejection of the complete message.
+ *
+ * Tag length MUST be 16 bytes or less. The normal EAX tag length is 16
+ * bytes. See `br_check_tag_trunc()` for some discussion on the potential
+ * perils of truncating authentication tags.
+ *
+ * \param ctx   EAX context structure.
+ * \param tag   tag value to compare with.
+ * \param len   tag length (in bytes).
+ * \return  1 on success (exact match of tag value), 0 otherwise.
+ */
+uint32_t br_eax_check_tag_trunc(br_eax_context *ctx,
+	const void *tag, size_t len);
+
+/**
+ * \brief Class instance for EAX.
+ */
+extern const br_aead_class br_eax_vtable;
+
+/**
+ * \brief Context structure for CCM.
+ *
+ * CCM is an AEAD mode that combines a block cipher in CTR mode with
+ * CBC-MAC using the same block cipher and the same key, to provide
+ * authenticated encryption:
+ *
+ *   - Any block cipher with 16-byte blocks can be used with CCM
+ *     (technically, other block sizes are defined as well, but this
+ *     is not implemented by these functions; shorter blocks also
+ *     imply numerous security issues).
+ *
+ *   - The authentication tag length, and plaintext length, MUST be
+ *     known when starting processing data. Plaintext and ciphertext
+ *     can still be provided by chunks, but the total size must match
+ *     the value provided upon initialisation.
+ *
+ *   - The nonce length is constrained betwen 7 and 13 bytes (inclusive).
+ *     Furthermore, the plaintext length, when encoded, must fit over
+ *     15-nonceLen bytes; thus, if the nonce has length 13 bytes, then
+ *     the plaintext length cannot exceed 65535 bytes.
+ *
+ *   - Additional authenticated data length is practically unlimited
+ *     (formal limit is at 2^64 bytes).
+ *
+ *   - The authentication tag has length 4 to 16 bytes (even values only).
+ *
+ * The CCM initialisation function receives as parameter an
+ * _initialised_ block cipher implementation context, with the secret
+ * key already set. A pointer to that context will be kept within the
+ * CCM context structure. It is up to the caller to allocate and
+ * initialise that block cipher context.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	const br_block_ctrcbc_class **bctx;
+	unsigned char ctr[16];
+	unsigned char cbcmac[16];
+	unsigned char tagmask[16];
+	unsigned char buf[16];
+	size_t ptr;
+	size_t tag_len;
+#endif
+} br_ccm_context;
+
+/**
+ * \brief Initialize a CCM context.
+ *
+ * A block cipher implementation, with its initialised context
+ * structure, is provided. The block cipher MUST use 16-byte blocks in
+ * CTR + CBC-MAC mode, and its secret key MUST have been already set in
+ * the provided context. The parameters are linked in the CCM context.
+ *
+ * After this function has been called, the `br_ccm_reset()` function must
+ * be called, to provide the nonce for CCM computation.
+ *
+ * \param ctx    CCM context structure.
+ * \param bctx   block cipher context (already initialised with secret key).
+ */
+void br_ccm_init(br_ccm_context *ctx, const br_block_ctrcbc_class **bctx);
+
+/**
+ * \brief Reset a CCM context.
+ *
+ * This function resets an already initialised CCM context for a new
+ * computation run. Implementations and keys are conserved. This function
+ * can be called at any time; it cancels any ongoing CCM computation that
+ * uses the provided context structure.
+ *
+ * The `aad_len` parameter contains the total length, in bytes, of the
+ * additional authenticated data. It may be zero. That length MUST be
+ * exact.
+ *
+ * The `data_len` parameter contains the total length, in bytes, of the
+ * data that will be injected (plaintext or ciphertext). That length MUST
+ * be exact. Moreover, that length MUST be less than 2^(8*(15-nonce_len)).
+ *
+ * The nonce length (`nonce_len`), in bytes, must be in the 7..13 range
+ * (inclusive).
+ *
+ * The tag length (`tag_len`), in bytes, must be in the 4..16 range, and
+ * be an even integer. Short tags mechanically allow for higher forgery
+ * probabilities; hence, tag sizes smaller than 12 bytes shall be used only
+ * with care.
+ *
+ * It is critical to CCM security that nonce values are not repeated for
+ * the same encryption key. Random generation of nonces is not generally
+ * recommended, due to the relatively small maximum nonce value.
+ *
+ * Returned value is 1 on success, 0 on error. An error is reported if
+ * the tag or nonce length is out of range, or if the
+ * plaintext/ciphertext length cannot be encoded with the specified
+ * nonce length.
+ *
+ * \param ctx         CCM context structure.
+ * \param nonce       CCM nonce to use.
+ * \param nonce_len   CCM nonce length (in bytes, 7 to 13).
+ * \param aad_len     additional authenticated data length (in bytes).
+ * \param data_len    plaintext/ciphertext length (in bytes).
+ * \param tag_len     tag length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+int br_ccm_reset(br_ccm_context *ctx, const void *nonce, size_t nonce_len,
+	uint64_t aad_len, uint64_t data_len, size_t tag_len);
+
+/**
+ * \brief Inject additional authenticated data into CCM.
+ *
+ * The provided data is injected into a running CCM computation. Additional
+ * data must be injected _before_ the call to `br_ccm_flip()`.
+ * Additional data can be injected in several chunks of arbitrary length,
+ * but the total amount MUST exactly match the value which was provided
+ * to `br_ccm_reset()`.
+ *
+ * \param ctx    CCM context structure.
+ * \param data   pointer to additional authenticated data.
+ * \param len    length of additional authenticated data (in bytes).
+ */
+void br_ccm_aad_inject(br_ccm_context *ctx, const void *data, size_t len);
+
+/**
+ * \brief Finish injection of additional authenticated data into CCM.
+ *
+ * This function MUST be called before beginning the actual encryption
+ * or decryption (with `br_ccm_run()`), even if no additional authenticated
+ * data was injected. No additional authenticated data may be injected
+ * after this function call.
+ *
+ * \param ctx   CCM context structure.
+ */
+void br_ccm_flip(br_ccm_context *ctx);
+
+/**
+ * \brief Encrypt or decrypt some data with CCM.
+ *
+ * Data encryption or decryption can be done after `br_ccm_flip()`
+ * has been called on the context. If `encrypt` is non-zero, then the
+ * provided data shall be plaintext, and it is encrypted in place.
+ * Otherwise, the data shall be ciphertext, and it is decrypted in place.
+ *
+ * Data may be provided in several chunks of arbitrary length, provided
+ * that the total length exactly matches the length provided to the
+ * `br_ccm_reset()` call.
+ *
+ * \param ctx       CCM context structure.
+ * \param encrypt   non-zero for encryption, zero for decryption.
+ * \param data      data to encrypt or decrypt.
+ * \param len       data length (in bytes).
+ */
+void br_ccm_run(br_ccm_context *ctx, int encrypt, void *data, size_t len);
+
+/**
+ * \brief Compute CCM authentication tag.
+ *
+ * Compute the CCM authentication tag. This call terminates the CCM
+ * run: all data must have been injected with `br_ccm_run()` (in zero,
+ * one or more successive calls). After this function has been called,
+ * no more data can br processed; a `br_ccm_reset()` call is required
+ * to start a new message.
+ *
+ * The tag length was provided upon context initialisation (last call
+ * to `br_ccm_reset()`); it is returned by this function.
+ *
+ * The tag value must normally be sent along with the encrypted data.
+ * When decrypting, the tag value must be recomputed and compared with
+ * the received tag: if the two tag values differ, then either the tag
+ * or the encrypted data was altered in transit. As an alternative to
+ * this function, the `br_ccm_check_tag()` function can be used to
+ * compute and check the tag value.
+ *
+ * \param ctx   CCM context structure.
+ * \param tag   destination buffer for the tag (up to 16 bytes).
+ * \return  the tag length (in bytes).
+ */
+size_t br_ccm_get_tag(br_ccm_context *ctx, void *tag);
+
+/**
+ * \brief Compute and check CCM authentication tag.
+ *
+ * This function is an alternative to `br_ccm_get_tag()`, normally used
+ * on the receiving end (i.e. when decrypting value). The tag value is
+ * recomputed and compared with the provided tag value. If they match, 1
+ * is returned; on mismatch, 0 is returned. A returned value of 0 means
+ * that the data or the tag was altered in transit, normally leading to
+ * wholesale rejection of the complete message.
+ *
+ * \param ctx   CCM context structure.
+ * \param tag   tag value to compare with (up to 16 bytes).
+ * \return  1 on success (exact match of tag value), 0 otherwise.
+ */
+uint32_t br_ccm_check_tag(br_ccm_context *ctx, const void *tag);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/inc/bearssl_block.h b/inc/bearssl_block.h
index 24f09ac..4772779 100644
--- a/inc/bearssl_block.h
+++ b/inc/bearssl_block.h
@@ -136,6 +136,73 @@ extern "C" {
  *     chunked processing, provided that each chunk length (except possibly
  *     the last one) is a multiple of the block size.
  *
+ *   - `br_xxx_ctrcbc_keys`
+ *
+ *     Context structure that contains the subkeys resulting from the
+ *     key expansion. These subkeys are appropriate for doing combined
+ *     CTR encryption/decryption and CBC-MAC, as used in the CCM and EAX
+ *     authenticated encryption modes. The structure first field is
+ *     called `vtable` and points to the appropriate OOP structure.
+ *
+ *   - `br_xxx_ctrcbc_init(br_xxx_ctr_keys *ctx, const void *key, size_t len)`
+ *
+ *     Perform key expansion: subkeys for combined CTR
+ *     encryption/decryption and CBC-MAC are computed and written in the
+ *     provided context structure. The key length MUST be adequate for
+ *     the implemented block cipher. This function also sets the
+ *     `vtable` field.
+ *
+ *   - `br_xxx_ctrcbc_encrypt(const br_xxx_ctrcbc_keys *ctx, void *ctr, void *cbcmac, void *data, size_t len)`
+ *
+ *     Perform CTR encryption of some data, and CBC-MAC. Processing is
+ *     done "in place" (the output data replaces the input data). This
+ *     function applies CTR encryption on the data, using a full
+ *     block-size counter (i.e. for 128-bit blocks, the counter is
+ *     incremented as a 128-bit value). The 'ctr' array contains the
+ *     initial value for the counter (used in the first block) and it is
+ *     updated with the new value after data processing. The 'cbcmac'
+ *     value shall point to a block-sized value which is used as IV for
+ *     CBC-MAC, computed over the encrypted data (output of CTR
+ *     encryption); the resulting CBC-MAC is written over 'cbcmac' on
+ *     output.
+ *
+ *     The data length MUST be a multiple of the block size.
+ *
+ *   - `br_xxx_ctrcbc_decrypt(const br_xxx_ctrcbc_keys *ctx, void *ctr, void *cbcmac, void *data, size_t len)`
+ *
+ *     Perform CTR decryption of some data, and CBC-MAC. Processing is
+ *     done "in place" (the output data replaces the input data). This
+ *     function applies CTR decryption on the data, using a full
+ *     block-size counter (i.e. for 128-bit blocks, the counter is
+ *     incremented as a 128-bit value). The 'ctr' array contains the
+ *     initial value for the counter (used in the first block) and it is
+ *     updated with the new value after data processing. The 'cbcmac'
+ *     value shall point to a block-sized value which is used as IV for
+ *     CBC-MAC, computed over the encrypted data (input of CTR
+ *     encryption); the resulting CBC-MAC is written over 'cbcmac' on
+ *     output.
+ *
+ *     The data length MUST be a multiple of the block size.
+ *
+ *   - `br_xxx_ctrcbc_ctr(const br_xxx_ctrcbc_keys *ctx, void *ctr, void *data, size_t len)`
+ *
+ *     Perform CTR encryption or decryption of the provided data. The
+ *     data is processed "in place" (the output data replaces the input
+ *     data). A full block-sized counter is applied (i.e. for 128-bit
+ *     blocks, the counter is incremented as a 128-bit value). The 'ctr'
+ *     array contains the initial value for the counter (used in the
+ *     first block), and it is updated with the new value after data
+ *     processing.
+ *
+ *     The data length MUST be a multiple of the block size.
+ *
+ *   - `br_xxx_ctrcbc_mac(const br_xxx_ctrcbc_keys *ctx, void *cbcmac, const void *data, size_t len)`
+ *
+ *     Compute CBC-MAC over the provided data. The IV for CBC-MAC is
+ *     provided as 'cbcmac'; the output is written over the same array.
+ *     The data itself is untouched. The data length MUST be a multiple
+ *     of the block size.
+ *
  *
  * It shall be noted that the key expansion functions return `void`. If
  * the provided key length is not allowed, then there will be no error
@@ -176,6 +243,41 @@ extern "C" {
  *
  *     Pointer to the encryption/decryption function.
  *
+ * For combined CTR/CBC-MAC encryption, the `vtable` has a slightly
+ * different structure:
+ *
+ *   - `context_size`
+ *
+ *     The size (in bytes) of the context structure for subkeys.
+ *
+ *   - `block_size`
+ *
+ *     The cipher block size (in bytes).
+ *
+ *   - `log_block_size`
+ *
+ *     The base-2 logarithm of cipher block size (e.g. 4 for blocks
+ *     of 16 bytes).
+ *
+ *   - `init`
+ *
+ *     Pointer to the key expansion function.
+ *
+ *   - `encrypt`
+ *
+ *     Pointer to the CTR encryption + CBC-MAC function.
+ *
+ *   - `decrypt`
+ *
+ *     Pointer to the CTR decryption + CBC-MAC function.
+ *
+ *   - `ctr`
+ *
+ *     Pointer to the CTR encryption/decryption function.
+ *
+ *   - `mac`
+ *
+ *     Pointer to the CBC-MAC function.
  *
  * For block cipher "`xxx`", static, constant instances of these
  * structures are defined, under the names:
@@ -183,6 +285,7 @@ extern "C" {
  *   - `br_xxx_cbcenc_vtable`
  *   - `br_xxx_cbcdec_vtable`
  *   - `br_xxx_ctr_vtable`
+ *   - `br_xxx_ctrcbc_vtable`
  *
  *
  * ## Implemented Block Ciphers
@@ -460,6 +563,132 @@ struct br_block_ctr_class_ {
 		const void *iv, uint32_t cc, void *data, size_t len);
 };
 
+/**
+ * \brief Class type for combined CTR and CBC-MAC implementations.
+ *
+ * A `br_block_ctrcbc_class` instance points to the functions implementing
+ * a specific block cipher, when used in CTR mode for encrypting or
+ * decrypting data, along with CBC-MAC.
+ */
+typedef struct br_block_ctrcbc_class_ br_block_ctrcbc_class;
+struct br_block_ctrcbc_class_ {
+	/**
+	 * \brief Size (in bytes) of the context structure appropriate
+	 * for containing subkeys.
+	 */
+	size_t context_size;
+
+	/**
+	 * \brief Size of individual blocks (in bytes).
+	 */
+	unsigned block_size;
+
+	/**
+	 * \brief Base-2 logarithm of the size of individual blocks,
+	 * expressed in bytes.
+	 */
+	unsigned log_block_size;
+
+	/**
+	 * \brief Initialisation function.
+	 *
+	 * This function sets the `vtable` field in the context structure.
+	 * The key length MUST be one of the key lengths supported by
+	 * the implementation.
+	 *
+	 * \param ctx       context structure to initialise.
+	 * \param key       secret key.
+	 * \param key_len   key length (in bytes).
+	 */
+	void (*init)(const br_block_ctrcbc_class **ctx,
+		const void *key, size_t key_len);
+
+	/**
+	 * \brief Run the CTR encryption + CBC-MAC.
+	 *
+	 * The `ctr` parameter points to the counter; its length shall
+	 * be equal to the block size. It is updated by this function
+	 * as encryption proceeds.
+	 *
+	 * The `cbcmac` parameter points to the IV for CBC-MAC. The MAC
+	 * is computed over the encrypted data (output of CTR
+	 * encryption). Its length shall be equal to the block size. The
+	 * computed CBC-MAC value is written over the `cbcmac` array.
+	 *
+	 * The data to encrypt is updated "in place". Its length (`len`
+	 * bytes) MUST be a multiple of the block size.
+	 *
+	 * \param ctx      context structure (already initialised).
+	 * \param ctr      counter for CTR encryption (initial and final).
+	 * \param cbcmac   IV and output buffer for CBC-MAC.
+	 * \param data     data to encrypt.
+	 * \param len      data length (in bytes).
+	 */
+	void (*encrypt)(const br_block_ctrcbc_class *const *ctx,
+		void *ctr, void *cbcmac, void *data, size_t len);
+
+	/**
+	 * \brief Run the CTR decryption + CBC-MAC.
+	 *
+	 * The `ctr` parameter points to the counter; its length shall
+	 * be equal to the block size. It is updated by this function
+	 * as decryption proceeds.
+	 *
+	 * The `cbcmac` parameter points to the IV for CBC-MAC. The MAC
+	 * is computed over the encrypted data (i.e. before CTR
+	 * decryption). Its length shall be equal to the block size. The
+	 * computed CBC-MAC value is written over the `cbcmac` array.
+	 *
+	 * The data to decrypt is updated "in place". Its length (`len`
+	 * bytes) MUST be a multiple of the block size.
+	 *
+	 * \param ctx      context structure (already initialised).
+	 * \param ctr      counter for CTR encryption (initial and final).
+	 * \param cbcmac   IV and output buffer for CBC-MAC.
+	 * \param data     data to decrypt.
+	 * \param len      data length (in bytes).
+	 */
+	void (*decrypt)(const br_block_ctrcbc_class *const *ctx,
+		void *ctr, void *cbcmac, void *data, size_t len);
+
+	/**
+	 * \brief Run the CTR encryption/decryption only.
+	 *
+	 * The `ctr` parameter points to the counter; its length shall
+	 * be equal to the block size. It is updated by this function
+	 * as decryption proceeds.
+	 *
+	 * The data to decrypt is updated "in place". Its length (`len`
+	 * bytes) MUST be a multiple of the block size.
+	 *
+	 * \param ctx      context structure (already initialised).
+	 * \param ctr      counter for CTR encryption (initial and final).
+	 * \param data     data to decrypt.
+	 * \param len      data length (in bytes).
+	 */
+	void (*ctr)(const br_block_ctrcbc_class *const *ctx,
+		void *ctr, void *data, size_t len);
+
+	/**
+	 * \brief Run the CBC-MAC only.
+	 *
+	 * The `cbcmac` parameter points to the IV for CBC-MAC. The MAC
+	 * is computed over the encrypted data (i.e. before CTR
+	 * decryption). Its length shall be equal to the block size. The
+	 * computed CBC-MAC value is written over the `cbcmac` array.
+	 *
+	 * The data is unmodified. Its length (`len` bytes) MUST be a
+	 * multiple of the block size.
+	 *
+	 * \param ctx      context structure (already initialised).
+	 * \param cbcmac   IV and output buffer for CBC-MAC.
+	 * \param data     data to decrypt.
+	 * \param len      data length (in bytes).
+	 */
+	void (*mac)(const br_block_ctrcbc_class *const *ctx,
+		void *cbcmac, const void *data, size_t len);
+};
+
 /*
  * Traditional, table-based AES implementation. It is fast, but uses
  * internal tables (in particular a 1 kB table for encryption, another
@@ -517,6 +746,22 @@ typedef struct {
 #endif
 } br_aes_big_ctr_keys;
 
+/**
+ * \brief Context for AES subkeys (`aes_big` implementation, CTR encryption
+ * and decryption + CBC-MAC).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctrcbc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_big_ctrcbc_keys;
+
 /**
  * \brief Class instance for AES CBC encryption (`aes_big` implementation).
  */
@@ -533,6 +778,12 @@ extern const br_block_cbcdec_class br_aes_big_cbcdec_vtable;
  */
 extern const br_block_ctr_class br_aes_big_ctr_vtable;
 
+/**
+ * \brief Class instance for AES CTR encryption/decryption + CBC-MAC
+ * (`aes_big` implementation).
+ */
+extern const br_block_ctrcbc_class br_aes_big_ctrcbc_vtable;
+
 /**
  * \brief Context initialisation (key schedule) for AES CBC encryption
  * (`aes_big` implementation).
@@ -566,6 +817,17 @@ void br_aes_big_cbcdec_init(br_aes_big_cbcdec_keys *ctx,
 void br_aes_big_ctr_init(br_aes_big_ctr_keys *ctx,
 	const void *key, size_t len);
 
+/**
+ * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC
+ * (`aes_big` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_big_ctrcbc_init(br_aes_big_ctrcbc_keys *ctx,
+	const void *key, size_t len);
+
 /**
  * \brief CBC encryption with AES (`aes_big` implementation).
  *
@@ -594,13 +856,59 @@ void br_aes_big_cbcdec_run(const br_aes_big_cbcdec_keys *ctx, void *iv,
  * \param ctx    context (already initialised).
  * \param iv     IV (constant, 12 bytes).
  * \param cc     initial block counter value.
- * \param data   data to decrypt (updated).
+ * \param data   data to encrypt or decrypt (updated).
  * \param len    data length (in bytes).
  * \return  new block counter value.
  */
 uint32_t br_aes_big_ctr_run(const br_aes_big_ctr_keys *ctx,
 	const void *iv, uint32_t cc, void *data, size_t len);
 
+/**
+ * \brief CTR encryption + CBC-MAC with AES (`aes_big` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to encrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_big_ctrcbc_encrypt(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR decryption + CBC-MAC with AES (`aes_big` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to decrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_big_ctrcbc_decrypt(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR encryption/decryption with AES (`aes_big` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param data     data to MAC (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_big_ctrcbc_ctr(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len);
+
+/**
+ * \brief CBC-MAC with AES (`aes_big` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to MAC (unmodified).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_big_ctrcbc_mac(const br_aes_big_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len);
+
 /*
  * AES implementation optimized for size. It is slower than the
  * traditional table-based AES implementation, but requires much less
@@ -658,6 +966,22 @@ typedef struct {
 #endif
 } br_aes_small_ctr_keys;
 
+/**
+ * \brief Context for AES subkeys (`aes_small` implementation, CTR encryption
+ * and decryption + CBC-MAC).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctrcbc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_small_ctrcbc_keys;
+
 /**
  * \brief Class instance for AES CBC encryption (`aes_small` implementation).
  */
@@ -674,6 +998,12 @@ extern const br_block_cbcdec_class br_aes_small_cbcdec_vtable;
  */
 extern const br_block_ctr_class br_aes_small_ctr_vtable;
 
+/**
+ * \brief Class instance for AES CTR encryption/decryption + CBC-MAC
+ * (`aes_small` implementation).
+ */
+extern const br_block_ctrcbc_class br_aes_small_ctrcbc_vtable;
+
 /**
  * \brief Context initialisation (key schedule) for AES CBC encryption
  * (`aes_small` implementation).
@@ -707,6 +1037,17 @@ void br_aes_small_cbcdec_init(br_aes_small_cbcdec_keys *ctx,
 void br_aes_small_ctr_init(br_aes_small_ctr_keys *ctx,
 	const void *key, size_t len);
 
+/**
+ * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC
+ * (`aes_small` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_small_ctrcbc_init(br_aes_small_ctrcbc_keys *ctx,
+	const void *key, size_t len);
+
 /**
  * \brief CBC encryption with AES (`aes_small` implementation).
  *
@@ -742,6 +1083,52 @@ void br_aes_small_cbcdec_run(const br_aes_small_cbcdec_keys *ctx, void *iv,
 uint32_t br_aes_small_ctr_run(const br_aes_small_ctr_keys *ctx,
 	const void *iv, uint32_t cc, void *data, size_t len);
 
+/**
+ * \brief CTR encryption + CBC-MAC with AES (`aes_small` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to encrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_small_ctrcbc_encrypt(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR decryption + CBC-MAC with AES (`aes_small` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to decrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_small_ctrcbc_decrypt(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR encryption/decryption with AES (`aes_small` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param data     data to MAC (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_small_ctrcbc_ctr(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len);
+
+/**
+ * \brief CBC-MAC with AES (`aes_small` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to MAC (unmodified).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_small_ctrcbc_mac(const br_aes_small_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len);
+
 /*
  * Constant-time AES implementation. Its size is similar to that of
  * 'aes_big', and its performance is similar to that of 'aes_small' (faster
@@ -798,6 +1185,22 @@ typedef struct {
 #endif
 } br_aes_ct_ctr_keys;
 
+/**
+ * \brief Context for AES subkeys (`aes_ct` implementation, CTR encryption
+ * and decryption + CBC-MAC).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctrcbc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint32_t skey[60];
+	unsigned num_rounds;
+#endif
+} br_aes_ct_ctrcbc_keys;
+
 /**
  * \brief Class instance for AES CBC encryption (`aes_ct` implementation).
  */
@@ -814,6 +1217,12 @@ extern const br_block_cbcdec_class br_aes_ct_cbcdec_vtable;
  */
 extern const br_block_ctr_class br_aes_ct_ctr_vtable;
 
+/**
+ * \brief Class instance for AES CTR encryption/decryption + CBC-MAC
+ * (`aes_ct` implementation).
+ */
+extern const br_block_ctrcbc_class br_aes_ct_ctrcbc_vtable;
+
 /**
  * \brief Context initialisation (key schedule) for AES CBC encryption
  * (`aes_ct` implementation).
@@ -847,6 +1256,17 @@ void br_aes_ct_cbcdec_init(br_aes_ct_cbcdec_keys *ctx,
 void br_aes_ct_ctr_init(br_aes_ct_ctr_keys *ctx,
 	const void *key, size_t len);
 
+/**
+ * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC
+ * (`aes_ct` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_ct_ctrcbc_init(br_aes_ct_ctrcbc_keys *ctx,
+	const void *key, size_t len);
+
 /**
  * \brief CBC encryption with AES (`aes_ct` implementation).
  *
@@ -882,6 +1302,52 @@ void br_aes_ct_cbcdec_run(const br_aes_ct_cbcdec_keys *ctx, void *iv,
 uint32_t br_aes_ct_ctr_run(const br_aes_ct_ctr_keys *ctx,
 	const void *iv, uint32_t cc, void *data, size_t len);
 
+/**
+ * \brief CTR encryption + CBC-MAC with AES (`aes_ct` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to encrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct_ctrcbc_encrypt(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR decryption + CBC-MAC with AES (`aes_ct` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to decrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct_ctrcbc_decrypt(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR encryption/decryption with AES (`aes_ct` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param data     data to MAC (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct_ctrcbc_ctr(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len);
+
+/**
+ * \brief CBC-MAC with AES (`aes_ct` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to MAC (unmodified).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct_ctrcbc_mac(const br_aes_ct_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len);
+
 /*
  * 64-bit constant-time AES implementation. It is similar to 'aes_ct'
  * but uses 64-bit registers, making it about twice faster than 'aes_ct'
@@ -940,6 +1406,22 @@ typedef struct {
 #endif
 } br_aes_ct64_ctr_keys;
 
+/**
+ * \brief Context for AES subkeys (`aes_ct64` implementation, CTR encryption
+ * and decryption + CBC-MAC).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctrcbc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	uint64_t skey[30];
+	unsigned num_rounds;
+#endif
+} br_aes_ct64_ctrcbc_keys;
+
 /**
  * \brief Class instance for AES CBC encryption (`aes_ct64` implementation).
  */
@@ -956,6 +1438,12 @@ extern const br_block_cbcdec_class br_aes_ct64_cbcdec_vtable;
  */
 extern const br_block_ctr_class br_aes_ct64_ctr_vtable;
 
+/**
+ * \brief Class instance for AES CTR encryption/decryption + CBC-MAC
+ * (`aes_ct64` implementation).
+ */
+extern const br_block_ctrcbc_class br_aes_ct64_ctrcbc_vtable;
+
 /**
  * \brief Context initialisation (key schedule) for AES CBC encryption
  * (`aes_ct64` implementation).
@@ -989,6 +1477,17 @@ void br_aes_ct64_cbcdec_init(br_aes_ct64_cbcdec_keys *ctx,
 void br_aes_ct64_ctr_init(br_aes_ct64_ctr_keys *ctx,
 	const void *key, size_t len);
 
+/**
+ * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC
+ * (`aes_ct64` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_ct64_ctrcbc_init(br_aes_ct64_ctrcbc_keys *ctx,
+	const void *key, size_t len);
+
 /**
  * \brief CBC encryption with AES (`aes_ct64` implementation).
  *
@@ -1024,6 +1523,52 @@ void br_aes_ct64_cbcdec_run(const br_aes_ct64_cbcdec_keys *ctx, void *iv,
 uint32_t br_aes_ct64_ctr_run(const br_aes_ct64_ctr_keys *ctx,
 	const void *iv, uint32_t cc, void *data, size_t len);
 
+/**
+ * \brief CTR encryption + CBC-MAC with AES (`aes_ct64` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to encrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct64_ctrcbc_encrypt(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR decryption + CBC-MAC with AES (`aes_ct64` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to decrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct64_ctrcbc_decrypt(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR encryption/decryption with AES (`aes_ct64` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param data     data to MAC (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct64_ctrcbc_ctr(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len);
+
+/**
+ * \brief CBC-MAC with AES (`aes_ct64` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to MAC (unmodified).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_ct64_ctrcbc_mac(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len);
+
 /*
  * AES implementation using AES-NI opcodes (x86 platform).
  */
@@ -1083,6 +1628,24 @@ typedef struct {
 #endif
 } br_aes_x86ni_ctr_keys;
 
+/**
+ * \brief Context for AES subkeys (`aes_x86ni` implementation, CTR encryption
+ * and decryption + CBC-MAC).
+ *
+ * First field is a pointer to the vtable; it is set by the initialisation
+ * function. Other fields are not supposed to be accessed by user code.
+ */
+typedef struct {
+	/** \brief Pointer to vtable for this context. */
+	const br_block_ctrcbc_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	union {
+		unsigned char skni[16 * 15];
+	} skey;
+	unsigned num_rounds;
+#endif
+} br_aes_x86ni_ctrcbc_keys;
+
 /**
  * \brief Class instance for AES CBC encryption (`aes_x86ni` implementation).
  *
@@ -1111,6 +1674,16 @@ extern const br_block_cbcdec_class br_aes_x86ni_cbcdec_vtable;
  */
 extern const br_block_ctr_class br_aes_x86ni_ctr_vtable;
 
+/**
+ * \brief Class instance for AES CTR encryption/decryption + CBC-MAC
+ * (`aes_x86ni` implementation).
+ *
+ * Since this implementation might be omitted from the library, or the
+ * AES opcode unavailable on the current CPU, a pointer to this class
+ * instance should be obtained through `br_aes_x86ni_ctrcbc_get_vtable()`.
+ */
+extern const br_block_ctrcbc_class br_aes_x86ni_ctrcbc_vtable;
+
 /**
  * \brief Context initialisation (key schedule) for AES CBC encryption
  * (`aes_x86ni` implementation).
@@ -1144,6 +1717,17 @@ void br_aes_x86ni_cbcdec_init(br_aes_x86ni_cbcdec_keys *ctx,
 void br_aes_x86ni_ctr_init(br_aes_x86ni_ctr_keys *ctx,
 	const void *key, size_t len);
 
+/**
+ * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC
+ * (`aes_x86ni` implementation).
+ *
+ * \param ctx   context to initialise.
+ * \param key   secret key.
+ * \param len   secret key length (in bytes).
+ */
+void br_aes_x86ni_ctrcbc_init(br_aes_x86ni_ctrcbc_keys *ctx,
+	const void *key, size_t len);
+
 /**
  * \brief CBC encryption with AES (`aes_x86ni` implementation).
  *
@@ -1179,6 +1763,52 @@ void br_aes_x86ni_cbcdec_run(const br_aes_x86ni_cbcdec_keys *ctx, void *iv,
 uint32_t br_aes_x86ni_ctr_run(const br_aes_x86ni_ctr_keys *ctx,
 	const void *iv, uint32_t cc, void *data, size_t len);
 
+/**
+ * \brief CTR encryption + CBC-MAC with AES (`aes_x86ni` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to encrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_x86ni_ctrcbc_encrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR decryption + CBC-MAC with AES (`aes_x86ni` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to decrypt (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_x86ni_ctrcbc_decrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len);
+
+/**
+ * \brief CTR encryption/decryption with AES (`aes_x86ni` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param ctr      counter for CTR (16 bytes, updated).
+ * \param data     data to MAC (updated).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_x86ni_ctrcbc_ctr(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len);
+
+/**
+ * \brief CBC-MAC with AES (`aes_x86ni` implementation).
+ *
+ * \param ctx      context (already initialised).
+ * \param cbcmac   IV for CBC-MAC (updated).
+ * \param data     data to MAC (unmodified).
+ * \param len      data length (in bytes, MUST be a multiple of 16).
+ */
+void br_aes_x86ni_ctrcbc_mac(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len);
+
 /**
  * \brief Obtain the `aes_x86ni` AES-CBC (encryption) implementation, if
  * available.
@@ -1188,7 +1818,7 @@ uint32_t br_aes_x86ni_ctr_run(const br_aes_x86ni_ctr_keys *ctx,
  * opcodes are available on the currently running CPU. If either of
  * these conditions is not met, then this function returns `NULL`.
  *
- * \return  the `aes_x868ni` AES-CBC (encryption) implementation, or `NULL`.
+ * \return  the `aes_x86ni` AES-CBC (encryption) implementation, or `NULL`.
  */
 const br_block_cbcenc_class *br_aes_x86ni_cbcenc_get_vtable(void);
 
@@ -1201,7 +1831,7 @@ const br_block_cbcenc_class *br_aes_x86ni_cbcenc_get_vtable(void);
  * opcodes are available on the currently running CPU. If either of
  * these conditions is not met, then this function returns `NULL`.
  *
- * \return  the `aes_x868ni` AES-CBC (decryption) implementation, or `NULL`.
+ * \return  the `aes_x86ni` AES-CBC (decryption) implementation, or `NULL`.
  */
 const br_block_cbcdec_class *br_aes_x86ni_cbcdec_get_vtable(void);
 
@@ -1213,10 +1843,23 @@ const br_block_cbcdec_class *br_aes_x86ni_cbcdec_get_vtable(void);
  * opcodes are available on the currently running CPU. If either of
  * these conditions is not met, then this function returns `NULL`.
  *
- * \return  the `aes_x868ni` AES-CTR implementation, or `NULL`.
+ * \return  the `aes_x86ni` AES-CTR implementation, or `NULL`.
  */
 const br_block_ctr_class *br_aes_x86ni_ctr_get_vtable(void);
 
+/**
+ * \brief Obtain the `aes_x86ni` AES-CTR + CBC-MAC implementation, if
+ * available.
+ *
+ * This function returns a pointer to `br_aes_x86ni_ctrcbc_vtable`, if
+ * that implementation was compiled in the library _and_ the x86 AES
+ * opcodes are available on the currently running CPU. If either of
+ * these conditions is not met, then this function returns `NULL`.
+ *
+ * \return  the `aes_x86ni` AES-CTR implementation, or `NULL`.
+ */
+const br_block_ctrcbc_class *br_aes_x86ni_ctrcbc_get_vtable(void);
+
 /*
  * AES implementation using POWER8 opcodes.
  */
@@ -1452,6 +2095,22 @@ typedef union {
 	br_aes_pwr8_ctr_keys c_pwr8;
 } br_aes_gen_ctr_keys;
 
+/**
+ * \brief Aggregate structure large enough to be used as context for
+ * subkeys (CTR encryption/decryption + CBC-MAC) for all AES implementations.
+ */
+typedef union {
+	const br_block_ctrcbc_class *vtable;
+	br_aes_big_ctrcbc_keys c_big;
+	br_aes_small_ctrcbc_keys c_small;
+	br_aes_ct_ctrcbc_keys c_ct;
+	br_aes_ct64_ctrcbc_keys c_ct64;
+	/* FIXME
+	br_aes_x86ni_ctrcbc_keys c_x86ni;
+	br_aes_pwr8_ctrcbc_keys c_pwr8;
+	*/
+} br_aes_gen_ctrcbc_keys;
+
 /*
  * Traditional, table-based implementation for DES/3DES. Since tables are
  * used, cache-timing attacks are conceptually possible.
diff --git a/mk/Rules.mk b/mk/Rules.mk
index 21a58ce..2a281ff 100644
--- a/mk/Rules.mk
+++ b/mk/Rules.mk
@@ -1,6 +1,6 @@
 # Automatically generated rules. Use 'mkrules.sh' to modify/regenerate.
 
-OBJ = $(OBJDIR)$Psettings$O $(OBJDIR)$Pgcm$O $(OBJDIR)$Pccopy$O $(OBJDIR)$Pdec16be$O $(OBJDIR)$Pdec16le$O $(OBJDIR)$Pdec32be$O $(OBJDIR)$Pdec32le$O $(OBJDIR)$Pdec64be$O $(OBJDIR)$Pdec64le$O $(OBJDIR)$Penc16be$O $(OBJDIR)$Penc16le$O $(OBJDIR)$Penc32be$O $(OBJDIR)$Penc32le$O $(OBJDIR)$Penc64be$O $(OBJDIR)$Penc64le$O $(OBJDIR)$Ppemdec$O $(OBJDIR)$Pec_all_m15$O $(OBJDIR)$Pec_all_m31$O $(OBJDIR)$Pec_c25519_i15$O $(OBJDIR)$Pec_c25519_i31$O $(OBJDIR)$Pec_c25519_m15$O $(OBJDIR)$Pec_c25519_m31$O $(OBJDIR)$Pec_curve25519$O $(OBJDIR)$Pec_default$O $(OBJDIR)$Pec_p256_m15$O $(OBJDIR)$Pec_p256_m31$O $(OBJDIR)$Pec_prime_i15$O $(OBJDIR)$Pec_prime_i31$O $(OBJDIR)$Pec_secp256r1$O $(OBJDIR)$Pec_secp384r1$O $(OBJDIR)$Pec_secp521r1$O $(OBJDIR)$Pecdsa_atr$O $(OBJDIR)$Pecdsa_default_sign_asn1$O $(OBJDIR)$Pecdsa_default_sign_raw$O $(OBJDIR)$Pecdsa_default_vrfy_asn1$O $(OBJDIR)$Pecdsa_default_vrfy_raw$O $(OBJDIR)$Pecdsa_i15_bits$O $(OBJDIR)$Pecdsa_i15_sign_asn1$O $(OBJDIR)$Pecdsa_i15_sign_raw$O $(OBJDIR)$Pecdsa_i15_vrfy_asn1$O $(OBJDIR)$Pecdsa_i15_vrfy_raw$O $(OBJDIR)$Pecdsa_i31_bits$O $(OBJDIR)$Pecdsa_i31_sign_asn1$O $(OBJDIR)$Pecdsa_i31_sign_raw$O $(OBJDIR)$Pecdsa_i31_vrfy_asn1$O $(OBJDIR)$Pecdsa_i31_vrfy_raw$O $(OBJDIR)$Pecdsa_rta$O $(OBJDIR)$Pdig_oid$O $(OBJDIR)$Pdig_size$O $(OBJDIR)$Pghash_ctmul$O $(OBJDIR)$Pghash_ctmul32$O $(OBJDIR)$Pghash_ctmul64$O $(OBJDIR)$Pghash_pclmul$O $(OBJDIR)$Pghash_pwr8$O $(OBJDIR)$Pmd5$O $(OBJDIR)$Pmd5sha1$O $(OBJDIR)$Pmultihash$O $(OBJDIR)$Psha1$O $(OBJDIR)$Psha2big$O $(OBJDIR)$Psha2small$O $(OBJDIR)$Pi15_add$O $(OBJDIR)$Pi15_bitlen$O $(OBJDIR)$Pi15_decmod$O $(OBJDIR)$Pi15_decode$O $(OBJDIR)$Pi15_decred$O $(OBJDIR)$Pi15_encode$O $(OBJDIR)$Pi15_fmont$O $(OBJDIR)$Pi15_iszero$O $(OBJDIR)$Pi15_modpow$O $(OBJDIR)$Pi15_modpow2$O $(OBJDIR)$Pi15_montmul$O $(OBJDIR)$Pi15_mulacc$O $(OBJDIR)$Pi15_muladd$O $(OBJDIR)$Pi15_ninv15$O $(OBJDIR)$Pi15_reduce$O $(OBJDIR)$Pi15_rshift$O $(OBJDIR)$Pi15_sub$O $(OBJDIR)$Pi15_tmont$O $(OBJDIR)$Pi31_add$O $(OBJDIR)$Pi31_bitlen$O $(OBJDIR)$Pi31_decmod$O $(OBJDIR)$Pi31_decode$O $(OBJDIR)$Pi31_decred$O $(OBJDIR)$Pi31_encode$O $(OBJDIR)$Pi31_fmont$O $(OBJDIR)$Pi31_iszero$O $(OBJDIR)$Pi31_modpow$O $(OBJDIR)$Pi31_modpow2$O $(OBJDIR)$Pi31_montmul$O $(OBJDIR)$Pi31_mulacc$O $(OBJDIR)$Pi31_muladd$O $(OBJDIR)$Pi31_ninv31$O $(OBJDIR)$Pi31_reduce$O $(OBJDIR)$Pi31_rshift$O $(OBJDIR)$Pi31_sub$O $(OBJDIR)$Pi31_tmont$O $(OBJDIR)$Pi32_add$O $(OBJDIR)$Pi32_bitlen$O $(OBJDIR)$Pi32_decmod$O $(OBJDIR)$Pi32_decode$O $(OBJDIR)$Pi32_decred$O $(OBJDIR)$Pi32_div32$O $(OBJDIR)$Pi32_encode$O $(OBJDIR)$Pi32_fmont$O $(OBJDIR)$Pi32_iszero$O $(OBJDIR)$Pi32_modpow$O $(OBJDIR)$Pi32_montmul$O $(OBJDIR)$Pi32_mulacc$O $(OBJDIR)$Pi32_muladd$O $(OBJDIR)$Pi32_ninv32$O $(OBJDIR)$Pi32_reduce$O $(OBJDIR)$Pi32_sub$O $(OBJDIR)$Pi32_tmont$O $(OBJDIR)$Pi62_modpow2$O $(OBJDIR)$Phmac$O $(OBJDIR)$Phmac_ct$O $(OBJDIR)$Phmac_drbg$O $(OBJDIR)$Psysrng$O $(OBJDIR)$Prsa_default_pkcs1_sign$O $(OBJDIR)$Prsa_default_pkcs1_vrfy$O $(OBJDIR)$Prsa_default_priv$O $(OBJDIR)$Prsa_default_pub$O $(OBJDIR)$Prsa_i15_pkcs1_sign$O $(OBJDIR)$Prsa_i15_pkcs1_vrfy$O $(OBJDIR)$Prsa_i15_priv$O $(OBJDIR)$Prsa_i15_pub$O $(OBJDIR)$Prsa_i31_pkcs1_sign$O $(OBJDIR)$Prsa_i31_pkcs1_vrfy$O $(OBJDIR)$Prsa_i31_priv$O $(OBJDIR)$Prsa_i31_pub$O $(OBJDIR)$Prsa_i32_pkcs1_sign$O $(OBJDIR)$Prsa_i32_pkcs1_vrfy$O $(OBJDIR)$Prsa_i32_priv$O $(OBJDIR)$Prsa_i32_pub$O $(OBJDIR)$Prsa_i62_pkcs1_sign$O $(OBJDIR)$Prsa_i62_pkcs1_vrfy$O $(OBJDIR)$Prsa_i62_priv$O $(OBJDIR)$Prsa_i62_pub$O $(OBJDIR)$Prsa_pkcs1_sig_pad$O $(OBJDIR)$Prsa_pkcs1_sig_unpad$O $(OBJDIR)$Prsa_ssl_decrypt$O $(OBJDIR)$Pprf$O $(OBJDIR)$Pprf_md5sha1$O $(OBJDIR)$Pprf_sha256$O $(OBJDIR)$Pprf_sha384$O $(OBJDIR)$Pssl_ccert_single_ec$O $(OBJDIR)$Pssl_ccert_single_rsa$O $(OBJDIR)$Pssl_client$O $(OBJDIR)$Pssl_client_default_rsapub$O $(OBJDIR)$Pssl_client_full$O $(OBJDIR)$Pssl_engine$O $(OBJDIR)$Pssl_engine_default_aescbc$O $(OBJDIR)$Pssl_engine_default_aesgcm$O $(OBJDIR)$Pssl_engine_default_chapol$O $(OBJDIR)$Pssl_engine_default_descbc$O $(OBJDIR)$Pssl_engine_default_ec$O $(OBJDIR)$Pssl_engine_default_ecdsa$O $(OBJDIR)$Pssl_engine_default_rsavrfy$O $(OBJDIR)$Pssl_hashes$O $(OBJDIR)$Pssl_hs_client$O $(OBJDIR)$Pssl_hs_server$O $(OBJDIR)$Pssl_io$O $(OBJDIR)$Pssl_keyexport$O $(OBJDIR)$Pssl_lru$O $(OBJDIR)$Pssl_rec_cbc$O $(OBJDIR)$Pssl_rec_chapol$O $(OBJDIR)$Pssl_rec_gcm$O $(OBJDIR)$Pssl_scert_single_ec$O $(OBJDIR)$Pssl_scert_single_rsa$O $(OBJDIR)$Pssl_server$O $(OBJDIR)$Pssl_server_full_ec$O $(OBJDIR)$Pssl_server_full_rsa$O $(OBJDIR)$Pssl_server_mine2c$O $(OBJDIR)$Pssl_server_mine2g$O $(OBJDIR)$Pssl_server_minf2c$O $(OBJDIR)$Pssl_server_minf2g$O $(OBJDIR)$Pssl_server_minr2g$O $(OBJDIR)$Pssl_server_minu2g$O $(OBJDIR)$Pssl_server_minv2g$O $(OBJDIR)$Paes_big_cbcdec$O $(OBJDIR)$Paes_big_cbcenc$O $(OBJDIR)$Paes_big_ctr$O $(OBJDIR)$Paes_big_dec$O $(OBJDIR)$Paes_big_enc$O $(OBJDIR)$Paes_common$O $(OBJDIR)$Paes_ct$O $(OBJDIR)$Paes_ct64$O $(OBJDIR)$Paes_ct64_cbcdec$O $(OBJDIR)$Paes_ct64_cbcenc$O $(OBJDIR)$Paes_ct64_ctr$O $(OBJDIR)$Paes_ct64_dec$O $(OBJDIR)$Paes_ct64_enc$O $(OBJDIR)$Paes_ct_cbcdec$O $(OBJDIR)$Paes_ct_cbcenc$O $(OBJDIR)$Paes_ct_ctr$O $(OBJDIR)$Paes_ct_dec$O $(OBJDIR)$Paes_ct_enc$O $(OBJDIR)$Paes_pwr8$O $(OBJDIR)$Paes_pwr8_cbcdec$O $(OBJDIR)$Paes_pwr8_cbcenc$O $(OBJDIR)$Paes_pwr8_ctr$O $(OBJDIR)$Paes_small_cbcdec$O $(OBJDIR)$Paes_small_cbcenc$O $(OBJDIR)$Paes_small_ctr$O $(OBJDIR)$Paes_small_dec$O $(OBJDIR)$Paes_small_enc$O $(OBJDIR)$Paes_x86ni$O $(OBJDIR)$Paes_x86ni_cbcdec$O $(OBJDIR)$Paes_x86ni_cbcenc$O $(OBJDIR)$Paes_x86ni_ctr$O $(OBJDIR)$Pchacha20_ct$O $(OBJDIR)$Pchacha20_sse2$O $(OBJDIR)$Pdes_ct$O $(OBJDIR)$Pdes_ct_cbcdec$O $(OBJDIR)$Pdes_ct_cbcenc$O $(OBJDIR)$Pdes_support$O $(OBJDIR)$Pdes_tab$O $(OBJDIR)$Pdes_tab_cbcdec$O $(OBJDIR)$Pdes_tab_cbcenc$O $(OBJDIR)$Ppoly1305_ctmul$O $(OBJDIR)$Ppoly1305_ctmul32$O $(OBJDIR)$Ppoly1305_ctmulq$O $(OBJDIR)$Ppoly1305_i15$O $(OBJDIR)$Pskey_decoder$O $(OBJDIR)$Px509_decoder$O $(OBJDIR)$Px509_knownkey$O $(OBJDIR)$Px509_minimal$O $(OBJDIR)$Px509_minimal_full$O
+OBJ = $(OBJDIR)$Psettings$O $(OBJDIR)$Pccm$O $(OBJDIR)$Peax$O $(OBJDIR)$Pgcm$O $(OBJDIR)$Pccopy$O $(OBJDIR)$Pdec16be$O $(OBJDIR)$Pdec16le$O $(OBJDIR)$Pdec32be$O $(OBJDIR)$Pdec32le$O $(OBJDIR)$Pdec64be$O $(OBJDIR)$Pdec64le$O $(OBJDIR)$Penc16be$O $(OBJDIR)$Penc16le$O $(OBJDIR)$Penc32be$O $(OBJDIR)$Penc32le$O $(OBJDIR)$Penc64be$O $(OBJDIR)$Penc64le$O $(OBJDIR)$Ppemdec$O $(OBJDIR)$Pec_all_m15$O $(OBJDIR)$Pec_all_m31$O $(OBJDIR)$Pec_c25519_i15$O $(OBJDIR)$Pec_c25519_i31$O $(OBJDIR)$Pec_c25519_m15$O $(OBJDIR)$Pec_c25519_m31$O $(OBJDIR)$Pec_curve25519$O $(OBJDIR)$Pec_default$O $(OBJDIR)$Pec_p256_m15$O $(OBJDIR)$Pec_p256_m31$O $(OBJDIR)$Pec_prime_i15$O $(OBJDIR)$Pec_prime_i31$O $(OBJDIR)$Pec_secp256r1$O $(OBJDIR)$Pec_secp384r1$O $(OBJDIR)$Pec_secp521r1$O $(OBJDIR)$Pecdsa_atr$O $(OBJDIR)$Pecdsa_default_sign_asn1$O $(OBJDIR)$Pecdsa_default_sign_raw$O $(OBJDIR)$Pecdsa_default_vrfy_asn1$O $(OBJDIR)$Pecdsa_default_vrfy_raw$O $(OBJDIR)$Pecdsa_i15_bits$O $(OBJDIR)$Pecdsa_i15_sign_asn1$O $(OBJDIR)$Pecdsa_i15_sign_raw$O $(OBJDIR)$Pecdsa_i15_vrfy_asn1$O $(OBJDIR)$Pecdsa_i15_vrfy_raw$O $(OBJDIR)$Pecdsa_i31_bits$O $(OBJDIR)$Pecdsa_i31_sign_asn1$O $(OBJDIR)$Pecdsa_i31_sign_raw$O $(OBJDIR)$Pecdsa_i31_vrfy_asn1$O $(OBJDIR)$Pecdsa_i31_vrfy_raw$O $(OBJDIR)$Pecdsa_rta$O $(OBJDIR)$Pdig_oid$O $(OBJDIR)$Pdig_size$O $(OBJDIR)$Pghash_ctmul$O $(OBJDIR)$Pghash_ctmul32$O $(OBJDIR)$Pghash_ctmul64$O $(OBJDIR)$Pghash_pclmul$O $(OBJDIR)$Pghash_pwr8$O $(OBJDIR)$Pmd5$O $(OBJDIR)$Pmd5sha1$O $(OBJDIR)$Pmultihash$O $(OBJDIR)$Psha1$O $(OBJDIR)$Psha2big$O $(OBJDIR)$Psha2small$O $(OBJDIR)$Pi15_add$O $(OBJDIR)$Pi15_bitlen$O $(OBJDIR)$Pi15_decmod$O $(OBJDIR)$Pi15_decode$O $(OBJDIR)$Pi15_decred$O $(OBJDIR)$Pi15_encode$O $(OBJDIR)$Pi15_fmont$O $(OBJDIR)$Pi15_iszero$O $(OBJDIR)$Pi15_modpow$O $(OBJDIR)$Pi15_modpow2$O $(OBJDIR)$Pi15_montmul$O $(OBJDIR)$Pi15_mulacc$O $(OBJDIR)$Pi15_muladd$O $(OBJDIR)$Pi15_ninv15$O $(OBJDIR)$Pi15_reduce$O $(OBJDIR)$Pi15_rshift$O $(OBJDIR)$Pi15_sub$O $(OBJDIR)$Pi15_tmont$O $(OBJDIR)$Pi31_add$O $(OBJDIR)$Pi31_bitlen$O $(OBJDIR)$Pi31_decmod$O $(OBJDIR)$Pi31_decode$O $(OBJDIR)$Pi31_decred$O $(OBJDIR)$Pi31_encode$O $(OBJDIR)$Pi31_fmont$O $(OBJDIR)$Pi31_iszero$O $(OBJDIR)$Pi31_modpow$O $(OBJDIR)$Pi31_modpow2$O $(OBJDIR)$Pi31_montmul$O $(OBJDIR)$Pi31_mulacc$O $(OBJDIR)$Pi31_muladd$O $(OBJDIR)$Pi31_ninv31$O $(OBJDIR)$Pi31_reduce$O $(OBJDIR)$Pi31_rshift$O $(OBJDIR)$Pi31_sub$O $(OBJDIR)$Pi31_tmont$O $(OBJDIR)$Pi32_add$O $(OBJDIR)$Pi32_bitlen$O $(OBJDIR)$Pi32_decmod$O $(OBJDIR)$Pi32_decode$O $(OBJDIR)$Pi32_decred$O $(OBJDIR)$Pi32_div32$O $(OBJDIR)$Pi32_encode$O $(OBJDIR)$Pi32_fmont$O $(OBJDIR)$Pi32_iszero$O $(OBJDIR)$Pi32_modpow$O $(OBJDIR)$Pi32_montmul$O $(OBJDIR)$Pi32_mulacc$O $(OBJDIR)$Pi32_muladd$O $(OBJDIR)$Pi32_ninv32$O $(OBJDIR)$Pi32_reduce$O $(OBJDIR)$Pi32_sub$O $(OBJDIR)$Pi32_tmont$O $(OBJDIR)$Pi62_modpow2$O $(OBJDIR)$Phmac$O $(OBJDIR)$Phmac_ct$O $(OBJDIR)$Phmac_drbg$O $(OBJDIR)$Psysrng$O $(OBJDIR)$Prsa_default_pkcs1_sign$O $(OBJDIR)$Prsa_default_pkcs1_vrfy$O $(OBJDIR)$Prsa_default_priv$O $(OBJDIR)$Prsa_default_pub$O $(OBJDIR)$Prsa_i15_pkcs1_sign$O $(OBJDIR)$Prsa_i15_pkcs1_vrfy$O $(OBJDIR)$Prsa_i15_priv$O $(OBJDIR)$Prsa_i15_pub$O $(OBJDIR)$Prsa_i31_pkcs1_sign$O $(OBJDIR)$Prsa_i31_pkcs1_vrfy$O $(OBJDIR)$Prsa_i31_priv$O $(OBJDIR)$Prsa_i31_pub$O $(OBJDIR)$Prsa_i32_pkcs1_sign$O $(OBJDIR)$Prsa_i32_pkcs1_vrfy$O $(OBJDIR)$Prsa_i32_priv$O $(OBJDIR)$Prsa_i32_pub$O $(OBJDIR)$Prsa_i62_pkcs1_sign$O $(OBJDIR)$Prsa_i62_pkcs1_vrfy$O $(OBJDIR)$Prsa_i62_priv$O $(OBJDIR)$Prsa_i62_pub$O $(OBJDIR)$Prsa_pkcs1_sig_pad$O $(OBJDIR)$Prsa_pkcs1_sig_unpad$O $(OBJDIR)$Prsa_ssl_decrypt$O $(OBJDIR)$Pprf$O $(OBJDIR)$Pprf_md5sha1$O $(OBJDIR)$Pprf_sha256$O $(OBJDIR)$Pprf_sha384$O $(OBJDIR)$Pssl_ccert_single_ec$O $(OBJDIR)$Pssl_ccert_single_rsa$O $(OBJDIR)$Pssl_client$O $(OBJDIR)$Pssl_client_default_rsapub$O $(OBJDIR)$Pssl_client_full$O $(OBJDIR)$Pssl_engine$O $(OBJDIR)$Pssl_engine_default_aescbc$O $(OBJDIR)$Pssl_engine_default_aesgcm$O $(OBJDIR)$Pssl_engine_default_chapol$O $(OBJDIR)$Pssl_engine_default_descbc$O $(OBJDIR)$Pssl_engine_default_ec$O $(OBJDIR)$Pssl_engine_default_ecdsa$O $(OBJDIR)$Pssl_engine_default_rsavrfy$O $(OBJDIR)$Pssl_hashes$O $(OBJDIR)$Pssl_hs_client$O $(OBJDIR)$Pssl_hs_server$O $(OBJDIR)$Pssl_io$O $(OBJDIR)$Pssl_keyexport$O $(OBJDIR)$Pssl_lru$O $(OBJDIR)$Pssl_rec_cbc$O $(OBJDIR)$Pssl_rec_chapol$O $(OBJDIR)$Pssl_rec_gcm$O $(OBJDIR)$Pssl_scert_single_ec$O $(OBJDIR)$Pssl_scert_single_rsa$O $(OBJDIR)$Pssl_server$O $(OBJDIR)$Pssl_server_full_ec$O $(OBJDIR)$Pssl_server_full_rsa$O $(OBJDIR)$Pssl_server_mine2c$O $(OBJDIR)$Pssl_server_mine2g$O $(OBJDIR)$Pssl_server_minf2c$O $(OBJDIR)$Pssl_server_minf2g$O $(OBJDIR)$Pssl_server_minr2g$O $(OBJDIR)$Pssl_server_minu2g$O $(OBJDIR)$Pssl_server_minv2g$O $(OBJDIR)$Paes_big_cbcdec$O $(OBJDIR)$Paes_big_cbcenc$O $(OBJDIR)$Paes_big_ctr$O $(OBJDIR)$Paes_big_ctrcbc$O $(OBJDIR)$Paes_big_dec$O $(OBJDIR)$Paes_big_enc$O $(OBJDIR)$Paes_common$O $(OBJDIR)$Paes_ct$O $(OBJDIR)$Paes_ct64$O $(OBJDIR)$Paes_ct64_cbcdec$O $(OBJDIR)$Paes_ct64_cbcenc$O $(OBJDIR)$Paes_ct64_ctr$O $(OBJDIR)$Paes_ct64_ctrcbc$O $(OBJDIR)$Paes_ct64_dec$O $(OBJDIR)$Paes_ct64_enc$O $(OBJDIR)$Paes_ct_cbcdec$O $(OBJDIR)$Paes_ct_cbcenc$O $(OBJDIR)$Paes_ct_ctr$O $(OBJDIR)$Paes_ct_ctrcbc$O $(OBJDIR)$Paes_ct_dec$O $(OBJDIR)$Paes_ct_enc$O $(OBJDIR)$Paes_pwr8$O $(OBJDIR)$Paes_pwr8_cbcdec$O $(OBJDIR)$Paes_pwr8_cbcenc$O $(OBJDIR)$Paes_pwr8_ctr$O $(OBJDIR)$Paes_small_cbcdec$O $(OBJDIR)$Paes_small_cbcenc$O $(OBJDIR)$Paes_small_ctr$O $(OBJDIR)$Paes_small_ctrcbc$O $(OBJDIR)$Paes_small_dec$O $(OBJDIR)$Paes_small_enc$O $(OBJDIR)$Paes_x86ni$O $(OBJDIR)$Paes_x86ni_cbcdec$O $(OBJDIR)$Paes_x86ni_cbcenc$O $(OBJDIR)$Paes_x86ni_ctr$O $(OBJDIR)$Paes_x86ni_ctrcbc$O $(OBJDIR)$Pchacha20_ct$O $(OBJDIR)$Pchacha20_sse2$O $(OBJDIR)$Pdes_ct$O $(OBJDIR)$Pdes_ct_cbcdec$O $(OBJDIR)$Pdes_ct_cbcenc$O $(OBJDIR)$Pdes_support$O $(OBJDIR)$Pdes_tab$O $(OBJDIR)$Pdes_tab_cbcdec$O $(OBJDIR)$Pdes_tab_cbcenc$O $(OBJDIR)$Ppoly1305_ctmul$O $(OBJDIR)$Ppoly1305_ctmul32$O $(OBJDIR)$Ppoly1305_ctmulq$O $(OBJDIR)$Ppoly1305_i15$O $(OBJDIR)$Pskey_decoder$O $(OBJDIR)$Px509_decoder$O $(OBJDIR)$Px509_knownkey$O $(OBJDIR)$Px509_minimal$O $(OBJDIR)$Px509_minimal_full$O
 OBJBRSSL = $(OBJDIR)$Pbrssl$O $(OBJDIR)$Pcerts$O $(OBJDIR)$Pchain$O $(OBJDIR)$Pclient$O $(OBJDIR)$Perrors$O $(OBJDIR)$Pfiles$O $(OBJDIR)$Pimpl$O $(OBJDIR)$Pkeys$O $(OBJDIR)$Pnames$O $(OBJDIR)$Pserver$O $(OBJDIR)$Pskey$O $(OBJDIR)$Psslio$O $(OBJDIR)$Pta$O $(OBJDIR)$Ptwrch$O $(OBJDIR)$Pvector$O $(OBJDIR)$Pverify$O $(OBJDIR)$Pxmem$O
 OBJTESTCRYPTO = $(OBJDIR)$Ptest_crypto$O
 OBJTESTSPEED = $(OBJDIR)$Ptest_speed$O
@@ -64,6 +64,12 @@ $(TESTX509): $(BEARSSLLIB) $(OBJTESTX509)
 $(OBJDIR)$Psettings$O: src$Psettings.c $(HEADERSPRIV)
 	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Psettings$O src$Psettings.c
 
+$(OBJDIR)$Pccm$O: src$Paead$Pccm.c $(HEADERSPRIV)
+	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Pccm$O src$Paead$Pccm.c
+
+$(OBJDIR)$Peax$O: src$Paead$Peax.c $(HEADERSPRIV)
+	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Peax$O src$Paead$Peax.c
+
 $(OBJDIR)$Pgcm$O: src$Paead$Pgcm.c $(HEADERSPRIV)
 	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Pgcm$O src$Paead$Pgcm.c
 
@@ -607,6 +613,9 @@ $(OBJDIR)$Paes_big_cbcenc$O: src$Psymcipher$Paes_big_cbcenc.c $(HEADERSPRIV)
 $(OBJDIR)$Paes_big_ctr$O: src$Psymcipher$Paes_big_ctr.c $(HEADERSPRIV)
 	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Paes_big_ctr$O src$Psymcipher$Paes_big_ctr.c
 
+$(OBJDIR)$Paes_big_ctrcbc$O: src$Psymcipher$Paes_big_ctrcbc.c $(HEADERSPRIV)
+	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Paes_big_ctrcbc$O src$Psymcipher$Paes_big_ctrcbc.c
+
 $(OBJDIR)$Paes_big_dec$O: src$Psymcipher$Paes_big_dec.c $(HEADERSPRIV)
 	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Paes_big_dec$O src$Psymcipher$Paes_big_dec.c
 
@@ -631,6 +640,9 @@ $(OBJDIR)$Paes_ct64_cbcenc$O: src$Psymcipher$Paes_ct64_cbcenc.c $(HEADERSPRIV)
 $(OBJDIR)$Paes_ct64_ctr$O: src$Psymcipher$Paes_ct64_ctr.c $(HEADERSPRIV)
 	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Paes_ct64_ctr$O src$Psymcipher$Paes_ct64_ctr.c
 
+$(OBJDIR)$Paes_ct64_ctrcbc$O: src$Psymcipher$Paes_ct64_ctrcbc.c $(HEADERSPRIV)
+	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Paes_ct64_ctrcbc$O src$Psymcipher$Paes_ct64_ctrcbc.c
+
 $(OBJDIR)$Paes_ct64_dec$O: src$Psymcipher$Paes_ct64_dec.c $(HEADERSPRIV)
 	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Paes_ct64_dec$O src$Psymcipher$Paes_ct64_dec.c
 
@@ -646,6 +658,9 @@ $(OBJDIR)$Paes_ct_cbcenc$O: src$Psymcipher$Paes_ct_cbcenc.c $(HEADERSPRIV)
 $(OBJDIR)$Paes_ct_ctr$O: src$Psymcipher$Paes_ct_ctr.c $(HEADERSPRIV)
 	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Paes_ct_ctr$O src$Psymcipher$Paes_ct_ctr.c
 
+$(OBJDIR)$Paes_ct_ctrcbc$O: src$Psymcipher$Paes_ct_ctrcbc.c $(HEADERSPRIV)
+	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Paes_ct_ctrcbc$O src$Psymcipher$Paes_ct_ctrcbc.c
+
 $(OBJDIR)$Paes_ct_dec$O: src$Psymcipher$Paes_ct_dec.c $(HEADERSPRIV)
 	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Paes_ct_dec$O src$Psymcipher$Paes_ct_dec.c
 
@@ -673,6 +688,9 @@ $(OBJDIR)$Paes_small_cbcenc$O: src$Psymcipher$Paes_small_cbcenc.c $(HEADERSPRIV)
 $(OBJDIR)$Paes_small_ctr$O: src$Psymcipher$Paes_small_ctr.c $(HEADERSPRIV)
 	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Paes_small_ctr$O src$Psymcipher$Paes_small_ctr.c
 
+$(OBJDIR)$Paes_small_ctrcbc$O: src$Psymcipher$Paes_small_ctrcbc.c $(HEADERSPRIV)
+	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Paes_small_ctrcbc$O src$Psymcipher$Paes_small_ctrcbc.c
+
 $(OBJDIR)$Paes_small_dec$O: src$Psymcipher$Paes_small_dec.c $(HEADERSPRIV)
 	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Paes_small_dec$O src$Psymcipher$Paes_small_dec.c
 
@@ -691,6 +709,9 @@ $(OBJDIR)$Paes_x86ni_cbcenc$O: src$Psymcipher$Paes_x86ni_cbcenc.c $(HEADERSPRIV)
 $(OBJDIR)$Paes_x86ni_ctr$O: src$Psymcipher$Paes_x86ni_ctr.c $(HEADERSPRIV)
 	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Paes_x86ni_ctr$O src$Psymcipher$Paes_x86ni_ctr.c
 
+$(OBJDIR)$Paes_x86ni_ctrcbc$O: src$Psymcipher$Paes_x86ni_ctrcbc.c $(HEADERSPRIV)
+	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Paes_x86ni_ctrcbc$O src$Psymcipher$Paes_x86ni_ctrcbc.c
+
 $(OBJDIR)$Pchacha20_ct$O: src$Psymcipher$Pchacha20_ct.c $(HEADERSPRIV)
 	$(CC) $(CFLAGS) $(INCFLAGS) $(CCOUT)$(OBJDIR)$Pchacha20_ct$O src$Psymcipher$Pchacha20_ct.c
 
diff --git a/mk/mkrules.sh b/mk/mkrules.sh
index 44f787e..2fd1f0f 100755
--- a/mk/mkrules.sh
+++ b/mk/mkrules.sh
@@ -50,6 +50,8 @@ set -e
 # Source files. Please keep in alphabetical order.
 coresrc=" \
 	src/settings.c \
+	src/aead/ccm.c \
+	src/aead/eax.c \
 	src/aead/gcm.c \
 	src/codec/ccopy.c \
 	src/codec/dec16be.c \
@@ -231,6 +233,7 @@ coresrc=" \
 	src/symcipher/aes_big_cbcdec.c \
 	src/symcipher/aes_big_cbcenc.c \
 	src/symcipher/aes_big_ctr.c \
+	src/symcipher/aes_big_ctrcbc.c \
 	src/symcipher/aes_big_dec.c \
 	src/symcipher/aes_big_enc.c \
 	src/symcipher/aes_common.c \
@@ -239,11 +242,13 @@ coresrc=" \
 	src/symcipher/aes_ct64_cbcdec.c \
 	src/symcipher/aes_ct64_cbcenc.c \
 	src/symcipher/aes_ct64_ctr.c \
+	src/symcipher/aes_ct64_ctrcbc.c \
 	src/symcipher/aes_ct64_dec.c \
 	src/symcipher/aes_ct64_enc.c \
 	src/symcipher/aes_ct_cbcdec.c \
 	src/symcipher/aes_ct_cbcenc.c \
 	src/symcipher/aes_ct_ctr.c \
+	src/symcipher/aes_ct_ctrcbc.c \
 	src/symcipher/aes_ct_dec.c \
 	src/symcipher/aes_ct_enc.c \
 	src/symcipher/aes_pwr8.c \
@@ -253,12 +258,14 @@ coresrc=" \
 	src/symcipher/aes_small_cbcdec.c \
 	src/symcipher/aes_small_cbcenc.c \
 	src/symcipher/aes_small_ctr.c \
+	src/symcipher/aes_small_ctrcbc.c \
 	src/symcipher/aes_small_dec.c \
 	src/symcipher/aes_small_enc.c \
 	src/symcipher/aes_x86ni.c \
 	src/symcipher/aes_x86ni_cbcdec.c \
 	src/symcipher/aes_x86ni_cbcenc.c \
 	src/symcipher/aes_x86ni_ctr.c \
+	src/symcipher/aes_x86ni_ctrcbc.c \
 	src/symcipher/chacha20_ct.c \
 	src/symcipher/chacha20_sse2.c \
 	src/symcipher/des_ct.c \
diff --git a/src/aead/ccm.c b/src/aead/ccm.c
new file mode 100644
index 0000000..68cc913
--- /dev/null
+++ b/src/aead/ccm.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Implementation Notes
+ * ====================
+ *
+ * The combined CTR + CBC-MAC functions can only handle full blocks,
+ * so some buffering is necessary.
+ *
+ *  - 'ptr' contains a value from 0 to 15, which is the number of bytes
+ *    accumulated in buf[] that still needs to be processed with the
+ *    current CBC-MAC computation.
+ *
+ *  - When processing the message itself, CTR encryption/decryption is
+ *    also done at the same time. The first 'ptr' bytes of buf[] then
+ *    contains the plaintext bytes, while the last '16 - ptr' bytes of
+ *    buf[] are the remnants of the stream block, to be used against
+ *    the next input bytes, when available. When 'ptr' is 0, the
+ *    contents of buf[] are to be ignored.
+ *
+ *  - The current counter and running CBC-MAC values are kept in 'ctr'
+ *    and 'cbcmac', respectively.
+ */
+
+/* see bearssl_block.h */
+void
+br_ccm_init(br_ccm_context *ctx, const br_block_ctrcbc_class **bctx)
+{
+	ctx->bctx = bctx;
+}
+
+/* see bearssl_block.h */
+int
+br_ccm_reset(br_ccm_context *ctx, const void *nonce, size_t nonce_len,
+	uint64_t aad_len, uint64_t data_len, size_t tag_len)
+{
+	unsigned char tmp[16];
+	unsigned u, q;
+
+	if (nonce_len < 7 || nonce_len > 13) {
+		return 0;
+	}
+	if (tag_len < 4 || tag_len > 16 || (tag_len & 1) != 0) {
+		return 0;
+	}
+	q = 15 - (unsigned)nonce_len;
+	ctx->tag_len = tag_len;
+
+	/*
+	 * Block B0, to start CBC-MAC.
+	 */
+	tmp[0] = (aad_len > 0 ? 0x40 : 0x00)
+		| (((unsigned)tag_len - 2) << 2)
+		| (q - 1);
+	memcpy(tmp + 1, nonce, nonce_len);
+	for (u = 0; u < q; u ++) {
+		tmp[15 - u] = (unsigned char)data_len;
+		data_len >>= 8;
+	}
+	if (data_len != 0) {
+		/*
+		 * If the data length was not entirely consumed in the
+		 * loop above, then it exceeds the maximum limit of
+		 * q bytes (when encoded).
+		 */
+		return 0;
+	}
+
+	/*
+	 * Start CBC-MAC.
+	 */
+	memset(ctx->cbcmac, 0, sizeof ctx->cbcmac);
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, tmp, sizeof tmp);
+
+	/*
+	 * Assemble AAD length header.
+	 */
+	if ((aad_len >> 32) != 0) {
+		ctx->buf[0] = 0xFF;
+		ctx->buf[1] = 0xFF;
+		br_enc64be(ctx->buf + 2, aad_len);
+		ctx->ptr = 10;
+	} else if (aad_len >= 0xFF00) {
+		ctx->buf[0] = 0xFF;
+		ctx->buf[1] = 0xFE;
+		br_enc32be(ctx->buf + 2, (uint32_t)aad_len);
+		ctx->ptr = 6;
+	} else if (aad_len > 0) {
+		br_enc16be(ctx->buf, (unsigned)aad_len);
+		ctx->ptr = 2;
+	} else {
+		ctx->ptr = 0;
+	}
+
+	/*
+	 * Make initial counter value and compute tag mask.
+	 */
+	ctx->ctr[0] = q - 1;
+	memcpy(ctx->ctr + 1, nonce, nonce_len);
+	memset(ctx->ctr + 1 + nonce_len, 0, q);
+	memset(ctx->tagmask, 0, sizeof ctx->tagmask);
+	(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr,
+		ctx->tagmask, sizeof ctx->tagmask);
+
+	return 1;
+}
+
+/* see bearssl_block.h */
+void
+br_ccm_aad_inject(br_ccm_context *ctx, const void *data, size_t len)
+{
+	const unsigned char *dbuf;
+	size_t ptr;
+
+	dbuf = data;
+
+	/*
+	 * Complete partial block, if needed.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		size_t clen;
+
+		clen = (sizeof ctx->buf) - ptr;
+		if (clen > len) {
+			memcpy(ctx->buf + ptr, dbuf, len);
+			ctx->ptr = ptr + len;
+			return;
+		}
+		memcpy(ctx->buf + ptr, dbuf, clen);
+		dbuf += clen;
+		len -= clen;
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * Process complete blocks.
+	 */
+	ptr = len & 15;
+	len -= ptr;
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, dbuf, len);
+	dbuf += len;
+
+	/*
+	 * Copy last partial block in the context buffer.
+	 */
+	memcpy(ctx->buf, dbuf, ptr);
+	ctx->ptr = ptr;
+}
+
+/* see bearssl_block.h */
+void
+br_ccm_flip(br_ccm_context *ctx)
+{
+	size_t ptr;
+
+	/*
+	 * Complete AAD partial block with zeros, if necessary.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		memset(ctx->buf + ptr, 0, (sizeof ctx->buf) - ptr);
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+		ctx->ptr = 0;
+	}
+
+	/*
+	 * Counter was already set by br_ccm_reset().
+	 */
+}
+
+/* see bearssl_block.h */
+void
+br_ccm_run(br_ccm_context *ctx, int encrypt, void *data, size_t len)
+{
+	unsigned char *dbuf;
+	size_t ptr;
+
+	dbuf = data;
+
+	/*
+	 * Complete a partial block, if any: ctx->buf[] contains
+	 * ctx->ptr plaintext bytes (already reported), and the other
+	 * bytes are CTR stream output.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		size_t clen;
+		size_t u;
+
+		clen = (sizeof ctx->buf) - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		if (encrypt) {
+			for (u = 0; u < clen; u ++) {
+				unsigned w, x;
+
+				w = ctx->buf[ptr + u];
+				x = dbuf[u];
+				ctx->buf[ptr + u] = x;
+				dbuf[u] = w ^ x;
+			}
+		} else {
+			for (u = 0; u < clen; u ++) {
+				unsigned w;
+
+				w = ctx->buf[ptr + u] ^ dbuf[u];
+				dbuf[u] = w;
+				ctx->buf[ptr + u] = w;
+			}
+		}
+		dbuf += clen;
+		len -= clen;
+		ptr += clen;
+		if (ptr < sizeof ctx->buf) {
+			ctx->ptr = ptr;
+			return;
+		}
+		(*ctx->bctx)->mac(ctx->bctx,
+			ctx->cbcmac, ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * Process all complete blocks. Note that the ctrcbc API is for
+	 * encrypt-then-MAC (CBC-MAC is computed over the encrypted
+	 * blocks) while CCM uses MAC-and-encrypt (CBC-MAC is computed
+	 * over the plaintext blocks). Therefore, we need to use the
+	 * _decryption_ function for encryption, and the encryption
+	 * function for decryption (this works because CTR encryption
+	 * and decryption are identical, so the choice really is about
+	 * computing the CBC-MAC before or after XORing with the CTR
+	 * stream).
+	 */
+	ptr = len & 15;
+	len -= ptr;
+	if (encrypt) {
+		(*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	} else {
+		(*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	}
+	dbuf += len;
+
+	/*
+	 * If there is some remaining data, then we need to compute an
+	 * extra block of CTR stream.
+	 */
+	if (ptr != 0) {
+		size_t u;
+
+		memset(ctx->buf, 0, sizeof ctx->buf);
+		(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr,
+			ctx->buf, sizeof ctx->buf);
+		if (encrypt) {
+			for (u = 0; u < ptr; u ++) {
+				unsigned w, x;
+
+				w = ctx->buf[u];
+				x = dbuf[u];
+				ctx->buf[u] = x;
+				dbuf[u] = w ^ x;
+			}
+		} else {
+			for (u = 0; u < ptr; u ++) {
+				unsigned w;
+
+				w = ctx->buf[u] ^ dbuf[u];
+				dbuf[u] = w;
+				ctx->buf[u] = w;
+			}
+		}
+	}
+	ctx->ptr = ptr;
+}
+
+/* see bearssl_block.h */
+size_t
+br_ccm_get_tag(br_ccm_context *ctx, void *tag)
+{
+	size_t ptr;
+	size_t u;
+
+	/*
+	 * If there is some buffered data, then we need to pad it with
+	 * zeros and finish up CBC-MAC.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		memset(ctx->buf + ptr, 0, (sizeof ctx->buf) - ptr);
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * XOR the tag mask into the CBC-MAC output.
+	 */
+	for (u = 0; u < ctx->tag_len; u ++) {
+		ctx->cbcmac[u] ^= ctx->tagmask[u];
+	}
+	memcpy(tag, ctx->cbcmac, ctx->tag_len);
+	return ctx->tag_len;
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_ccm_check_tag(br_ccm_context *ctx, const void *tag)
+{
+	unsigned char tmp[16];
+	size_t u, tag_len;
+	uint32_t z;
+
+	tag_len = br_ccm_get_tag(ctx, tmp);
+	z = 0;
+	for (u = 0; u < tag_len; u ++) {
+		z |= tmp[u] ^ ((const unsigned char *)tag)[u];
+	}
+	return EQ0(z);
+}
diff --git a/src/aead/eax.c b/src/aead/eax.c
new file mode 100644
index 0000000..07b1cb9
--- /dev/null
+++ b/src/aead/eax.c
@@ -0,0 +1,413 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Implementation Notes
+ * ====================
+ *
+ * The combined CTR + CBC-MAC functions can only handle full blocks,
+ * so some buffering is necessary. Moreover, EAX has a special padding
+ * rule for CBC-MAC, which implies that we cannot compute the MAC over
+ * the last received full block until we know whether we are at the
+ * end of the data or not.
+ *
+ *  - 'ptr' contains a value from 1 to 16, which is the number of bytes
+ *    accumulated in buf[] that still needs to be processed with the
+ *    current OMAC computation. Beware that this can go to 16: a
+ *    complete block cannot be processed until it is known whether it
+ *    is the last block or not. However, it can never be 0, because
+ *    OMAC^t works on an input that is at least one-block long.
+ *
+ *  - When processing the message itself, CTR encryption/decryption is
+ *    also done at the same time. The first 'ptr' bytes of buf[] then
+ *    contains the encrypted bytes, while the last '16 - ptr' bytes of
+ *    buf[] are the remnants of the stream block, to be used against
+ *    the next input bytes, when available.
+ *
+ *  - The current counter and running CBC-MAC values are kept in 'ctr'
+ *    and 'cbcmac', respectively.
+ *
+ *  - The derived keys for padding are kept in L2 and L4 (double and
+ *    quadruple of Enc_K(0^n), in GF(2^128), respectively).
+ */
+
+/*
+ * Start an OMAC computation; the first block is the big-endian
+ * representation of the provided value ('val' must fit on one byte).
+ * We make it a delayed block because it may also be the last one,
+ */
+static void
+omac_start(br_eax_context *ctx, unsigned val)
+{
+	memset(ctx->cbcmac, 0, sizeof ctx->cbcmac);
+	memset(ctx->buf, 0, sizeof ctx->buf);
+	ctx->buf[15] = val;
+	ctx->ptr = 16;
+}
+
+/*
+ * Double a value in finite field GF(2^128), defined with modulus
+ * X^128+X^7+X^2+X+1.
+ */
+static void
+double_gf128(unsigned char *dst, const unsigned char *src)
+{
+	unsigned cc;
+	int i;
+
+	cc = 0x87 & -((unsigned)src[0] >> 7);
+	for (i = 15; i >= 0; i --) {
+		unsigned z;
+
+		z = (src[i] << 1) ^ cc;
+		cc = z >> 8;
+		dst[i] = (unsigned char)z;
+	}
+}
+
+/*
+ * Apply padding to the last block, currently in ctx->buf (with
+ * ctx->ptr bytes), and finalize OMAC computation.
+ */
+static void
+do_pad(br_eax_context *ctx)
+{
+	unsigned char *pad;
+	size_t ptr, u;
+
+	ptr = ctx->ptr;
+	if (ptr == 16) {
+		pad = ctx->L2;
+	} else {
+		ctx->buf[ptr ++] = 0x80;
+		memset(ctx->buf + ptr, 0x00, 16 - ptr);
+		pad = ctx->L4;
+	}
+	for (u = 0; u < sizeof ctx->buf; u ++) {
+		ctx->buf[u] ^= pad[u];
+	}
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, ctx->buf, sizeof ctx->buf);
+}
+
+/*
+ * Apply CBC-MAC on the provided data, with buffering management. This
+ * function assumes that on input, ctx->buf contains a full block of
+ * unprocessed data.
+ */
+static void
+do_cbcmac_chunk(br_eax_context *ctx, const void *data, size_t len)
+{
+	size_t ptr;
+
+	if (len == 0) {
+		return;
+	}
+	ptr = len & (size_t)15;
+	if (ptr == 0) {
+		len -= 16;
+		ptr = 16;
+	} else {
+		len -= ptr;
+	}
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, ctx->buf, sizeof ctx->buf);
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, data, len);
+	memcpy(ctx->buf, (const unsigned char *)data + len, ptr);
+	ctx->ptr = ptr;
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_init(br_eax_context *ctx, const br_block_ctrcbc_class **bctx)
+{
+	unsigned char tmp[16], iv[16];
+
+	ctx->vtable = &br_eax_vtable;
+	ctx->bctx = bctx;
+
+	/*
+	 * Encrypt a whole-zero block to compute L2 and L4.
+	 */
+	memset(tmp, 0, sizeof tmp);
+	memset(iv, 0, sizeof iv);
+	(*bctx)->ctr(bctx, iv, tmp, sizeof tmp);
+	double_gf128(ctx->L2, tmp);
+	double_gf128(ctx->L4, ctx->L2);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_reset(br_eax_context *ctx, const void *nonce, size_t len)
+{
+	/*
+	 * Process nonce with OMAC^0.
+	 */
+	omac_start(ctx, 0);
+	do_cbcmac_chunk(ctx, nonce, len);
+	do_pad(ctx);
+	memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);
+
+	/*
+	 * Start OMAC^1 for the AAD ("header" in the EAX specification).
+	 */
+	omac_start(ctx, 1);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_aad_inject(br_eax_context *ctx, const void *data, size_t len)
+{
+	size_t ptr;
+
+	ptr = ctx->ptr;
+
+	/*
+	 * If there is a partial block, first complete it.
+	 */
+	if (ptr < 16) {
+		size_t clen;
+
+		clen = 16 - ptr;
+		if (len <= clen) {
+			memcpy(ctx->buf + ptr, data, len);
+			ctx->ptr = ptr + len;
+			return;
+		}
+		memcpy(ctx->buf + ptr, data, clen);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+	}
+
+	/*
+	 * We now have a full block in buf[], and this is not the last
+	 * block.
+	 */
+	do_cbcmac_chunk(ctx, data, len);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_flip(br_eax_context *ctx)
+{
+	/*
+	 * Complete the OMAC computation on the AAD.
+	 */
+	do_pad(ctx);
+	memcpy(ctx->head, ctx->cbcmac, sizeof ctx->cbcmac);
+
+	/*
+	 * Start OMAC^2 for the encrypted data.
+	 */
+	omac_start(ctx, 2);
+
+	/*
+	 * Initial counter value for CTR is the processed nonce.
+	 */
+	memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_run(br_eax_context *ctx, int encrypt, void *data, size_t len)
+{
+	unsigned char *dbuf;
+	size_t ptr;
+
+	/*
+	 * Ensure that there is actual data to process.
+	 */
+	if (len == 0) {
+		return;
+	}
+
+	dbuf = data;
+	ptr = ctx->ptr;
+
+	if (ptr != 16) {
+		/*
+		 * We have a partially consumed block.
+		 */
+		size_t u, clen;
+
+		clen = 16 - ptr;
+		if (len <= clen) {
+			clen = len;
+		}
+		if (encrypt) {
+			for (u = 0; u < clen; u ++) {
+				ctx->buf[ptr + u] ^= dbuf[u];
+			}
+			memcpy(dbuf, ctx->buf + ptr, clen);
+		} else {
+			for (u = 0; u < clen; u ++) {
+				unsigned dx, sx;
+
+				sx = ctx->buf[ptr + u];
+				dx = dbuf[u];
+				ctx->buf[ptr + u] = dx;
+				dbuf[u] = sx ^ dx;
+			}
+		}
+
+		if (len <= clen) {
+			ctx->ptr = ptr + clen;
+			return;
+		}
+		dbuf += clen;
+		len -= clen;
+	}
+
+	/*
+	 * We now have a complete encrypted block in buf[] that must still
+	 * be processed with OMAC, and this is not the final buf.
+	 */
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, ctx->buf, sizeof ctx->buf);
+
+	/*
+	 * Do CTR encryption or decryption and CBC-MAC for all full blocks
+	 * except the last.
+	 */
+	ptr = len & (size_t)15;
+	if (ptr == 0) {
+		len -= 16;
+		ptr = 16;
+	} else {
+		len -= ptr;
+	}
+	if (encrypt) {
+		(*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	} else {
+		(*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	}
+	dbuf += len;
+
+	/*
+	 * Compute next block of CTR stream, and use it to finish
+	 * encrypting or decrypting the data.
+	 */
+	memset(ctx->buf, 0, sizeof ctx->buf);
+	(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr, ctx->buf, sizeof ctx->buf);
+	if (encrypt) {
+		size_t u;
+
+		for (u = 0; u < ptr; u ++) {
+			ctx->buf[u] ^= dbuf[u];
+		}
+		memcpy(dbuf, ctx->buf, ptr);
+	} else {
+		size_t u;
+
+		for (u = 0; u < ptr; u ++) {
+			unsigned dx, sx;
+
+			sx = ctx->buf[u];
+			dx = dbuf[u];
+			ctx->buf[u] = dx;
+			dbuf[u] = sx ^ dx;
+		}
+	}
+	ctx->ptr = ptr;
+}
+
+/*
+ * Complete tag computation. The final tag is written in ctx->cbcmac.
+ */
+static void
+do_final(br_eax_context *ctx)
+{
+	size_t u;
+
+	do_pad(ctx);
+
+	/*
+	 * Authentication tag is the XOR of the three OMAC outputs for
+	 * the nonce, AAD and encrypted data.
+	 */
+	for (u = 0; u < 16; u ++) {
+		ctx->cbcmac[u] ^= ctx->nonce[u] ^ ctx->head[u];
+	}
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_get_tag(br_eax_context *ctx, void *tag)
+{
+	do_final(ctx);
+	memcpy(tag, ctx->cbcmac, sizeof ctx->cbcmac);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_get_tag_trunc(br_eax_context *ctx, void *tag, size_t len)
+{
+	do_final(ctx);
+	memcpy(tag, ctx->cbcmac, len);
+}
+
+/* see bearssl_aead.h */
+uint32_t
+br_eax_check_tag_trunc(br_eax_context *ctx, const void *tag, size_t len)
+{
+	unsigned char tmp[16];
+	size_t u;
+	int x;
+
+	br_eax_get_tag(ctx, tmp);
+	x = 0;
+	for (u = 0; u < len; u ++) {
+		x |= tmp[u] ^ ((const unsigned char *)tag)[u];
+	}
+	return EQ0(x);
+}
+
+/* see bearssl_aead.h */
+uint32_t
+br_eax_check_tag(br_eax_context *ctx, const void *tag)
+{
+	return br_eax_check_tag_trunc(ctx, tag, 16);
+}
+
+/* see bearssl_aead.h */
+const br_aead_class br_eax_vtable = {
+	16,
+	(void (*)(const br_aead_class **, const void *, size_t))
+		&br_eax_reset,
+	(void (*)(const br_aead_class **, const void *, size_t))
+		&br_eax_aad_inject,
+	(void (*)(const br_aead_class **))
+		&br_eax_flip,
+	(void (*)(const br_aead_class **, int, void *, size_t))
+		&br_eax_run,
+	(void (*)(const br_aead_class **, void *))
+		&br_eax_get_tag,
+	(uint32_t (*)(const br_aead_class **, const void *))
+		&br_eax_check_tag,
+	(void (*)(const br_aead_class **, void *, size_t))
+		&br_eax_get_tag_trunc,
+	(uint32_t (*)(const br_aead_class **, const void *, size_t))
+		&br_eax_check_tag_trunc
+};
diff --git a/src/aead/gcm.c b/src/aead/gcm.c
index 9cf0f38..ede5f08 100644
--- a/src/aead/gcm.c
+++ b/src/aead/gcm.c
@@ -56,6 +56,7 @@ br_gcm_init(br_gcm_context *ctx, const br_block_ctr_class **bctx, br_ghash gh)
 {
 	unsigned char iv[12];
 
+	ctx->vtable = &br_gcm_vtable;
 	ctx->bctx = bctx;
 	ctx->gh = gh;
 
@@ -262,9 +263,19 @@ br_gcm_get_tag(br_gcm_context *ctx, void *tag)
 	(*ctx->bctx)->run(ctx->bctx, ctx->j0_1, ctx->j0_2, tag, 16);
 }
 
+/* see bearssl_aead.h */
+void
+br_gcm_get_tag_trunc(br_gcm_context *ctx, void *tag, size_t len)
+{
+	unsigned char tmp[16];
+
+	br_gcm_get_tag(ctx, tmp);
+	memcpy(tag, tmp, len);
+}
+
 /* see bearssl_aead.h */
 uint32_t
-br_gcm_check_tag(br_gcm_context *ctx, const void *tag)
+br_gcm_check_tag_trunc(br_gcm_context *ctx, const void *tag, size_t len)
 {
 	unsigned char tmp[16];
 	size_t u;
@@ -272,12 +283,19 @@ br_gcm_check_tag(br_gcm_context *ctx, const void *tag)
 
 	br_gcm_get_tag(ctx, tmp);
 	x = 0;
-	for (u = 0; u < sizeof tmp; u ++) {
+	for (u = 0; u < len; u ++) {
 		x |= tmp[u] ^ ((const unsigned char *)tag)[u];
 	}
 	return EQ0(x);
 }
 
+/* see bearssl_aead.h */
+uint32_t
+br_gcm_check_tag(br_gcm_context *ctx, const void *tag)
+{
+	return br_gcm_check_tag_trunc(ctx, tag, 16);
+}
+
 /* see bearssl_aead.h */
 const br_aead_class br_gcm_vtable = {
 	16,
@@ -292,5 +310,9 @@ const br_aead_class br_gcm_vtable = {
 	(void (*)(const br_aead_class **, void *))
 		&br_gcm_get_tag,
 	(uint32_t (*)(const br_aead_class **, const void *))
-		&br_gcm_check_tag
+		&br_gcm_check_tag,
+	(void (*)(const br_aead_class **, void *, size_t))
+		&br_gcm_get_tag_trunc,
+	(uint32_t (*)(const br_aead_class **, const void *, size_t))
+		&br_gcm_check_tag_trunc
 };
diff --git a/src/symcipher/aes_big_ctrcbc.c b/src/symcipher/aes_big_ctrcbc.c
new file mode 100644
index 0000000..d45ca76
--- /dev/null
+++ b/src/symcipher/aes_big_ctrcbc.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_init(br_aes_big_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_big_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_ctr(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf, *bctr;
+	uint32_t cc0, cc1, cc2, cc3;
+
+	buf = data;
+	bctr = ctr;
+	cc3 = br_dec32be(bctr +  0);
+	cc2 = br_dec32be(bctr +  4);
+	cc1 = br_dec32be(bctr +  8);
+	cc0 = br_dec32be(bctr + 12);
+	while (len > 0) {
+		unsigned char tmp[16];
+		uint32_t carry;
+
+		br_enc32be(tmp +  0, cc3);
+		br_enc32be(tmp +  4, cc2);
+		br_enc32be(tmp +  8, cc1);
+		br_enc32be(tmp + 12, cc0);
+		br_aes_big_encrypt(ctx->num_rounds, ctx->skey, tmp);
+		xorbuf(buf, tmp, 16);
+		buf += 16;
+		len -= 16;
+		cc0 ++;
+		carry = (~(cc0 | -cc0)) >> 31;
+		cc1 += carry;
+		carry &= (~(cc1 | -cc1)) >> 31;
+		cc2 += carry;
+		carry &= (~(cc2 | -cc2)) >> 31;
+		cc3 += carry;
+	}
+	br_enc32be(bctr +  0, cc3);
+	br_enc32be(bctr +  4, cc2);
+	br_enc32be(bctr +  8, cc1);
+	br_enc32be(bctr + 12, cc0);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_mac(const br_aes_big_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		xorbuf(cbcmac, buf, 16);
+		br_aes_big_encrypt(ctx->num_rounds, ctx->skey, cbcmac);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_encrypt(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_big_ctrcbc_ctr(ctx, ctr, data, len);
+	br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_decrypt(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len);
+	br_aes_big_ctrcbc_ctr(ctx, ctr, data, len);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_big_ctrcbc_vtable = {
+	sizeof(br_aes_big_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_big_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_big_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_big_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_big_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_big_ctrcbc_mac
+};
diff --git a/src/symcipher/aes_ct64_ctrcbc.c b/src/symcipher/aes_ct64_ctrcbc.c
new file mode 100644
index 0000000..21bb8ef
--- /dev/null
+++ b/src/symcipher/aes_ct64_ctrcbc.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_init(br_aes_ct64_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct64_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_ctr(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint64_t sk_exp[120];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	buf = data;
+	while (len > 0) {
+		uint64_t q[8];
+		uint32_t w[16];
+		unsigned char tmp[64];
+		int i, j;
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		j = (len >= 64) ? 16 : (int)(len >> 2);
+		for (i = 0; i < j; i += 4) {
+			uint32_t carry;
+
+			w[i + 0] = br_swap32(iv0);
+			w[i + 1] = br_swap32(iv1);
+			w[i + 2] = br_swap32(iv2);
+			w[i + 3] = br_swap32(iv3);
+			iv3 ++;
+			carry = ~(iv3 | -iv3) >> 31;
+			iv2 += carry;
+			carry &= -(~(iv2 | -iv2) >> 31);
+			iv1 += carry;
+			carry &= -(~(iv1 | -iv1) >> 31);
+			iv0 += carry;
+		}
+		memset(w + i, 0, (16 - i) * sizeof(uint32_t));
+
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_in(
+				&q[i], &q[i + 4], w + (i << 2));
+		}
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_out(
+				w + (i << 2), q[i], q[i + 4]);
+		}
+
+		br_range_enc32le(tmp, w, 16);
+		if (len <= 64) {
+			xorbuf(buf, tmp, len);
+			break;
+		}
+		xorbuf(buf, tmp, 64);
+		buf += 64;
+		len -= 64;
+	}
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_mac(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint64_t q[8];
+	uint64_t sk_exp[120];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	memset(q, 0, sizeof q);
+	while (len > 0) {
+		uint32_t w[4];
+
+		w[0] = cm0 ^ br_dec32le(buf +  0);
+		w[1] = cm1 ^ br_dec32le(buf +  4);
+		w[2] = cm2 ^ br_dec32le(buf +  8);
+		w[3] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+
+		cm0 = w[0];
+		cm1 = w[1];
+		cm2 = w[2];
+		cm3 = w[3];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_encrypt(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	/*
+	 * When encrypting, the CBC-MAC processing must be lagging by
+	 * one block, since it operates on the encrypted values, so
+	 * it must wait for that encryption to complete.
+	 */
+
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint64_t sk_exp[120];
+	uint64_t q[8];
+	int first_iter;
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	first_iter = 1;
+	memset(q, 0, sizeof q);
+	while (len > 0) {
+		uint32_t w[8], carry;
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		w[0] = br_swap32(iv0);
+		w[1] = br_swap32(iv1);
+		w[2] = br_swap32(iv2);
+		w[3] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The block for CBC-MAC.
+		 */
+		w[4] = cm0;
+		w[5] = cm1;
+		w[6] = cm2;
+		w[7] = cm3;
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+		br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
+
+		/*
+		 * We do the XOR with the plaintext in 32-bit registers,
+		 * so that the value are available for CBC-MAC processing
+		 * as well.
+		 */
+		w[0] ^= br_dec32le(buf +  0);
+		w[1] ^= br_dec32le(buf +  4);
+		w[2] ^= br_dec32le(buf +  8);
+		w[3] ^= br_dec32le(buf + 12);
+		br_enc32le(buf +  0, w[0]);
+		br_enc32le(buf +  4, w[1]);
+		br_enc32le(buf +  8, w[2]);
+		br_enc32le(buf + 12, w[3]);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * We set the cm* values to the block to encrypt in the
+		 * next iteration.
+		 */
+		if (first_iter) {
+			first_iter = 0;
+			cm0 ^= w[0];
+			cm1 ^= w[1];
+			cm2 ^= w[2];
+			cm3 ^= w[3];
+		} else {
+			cm0 = w[0] ^ w[4];
+			cm1 = w[1] ^ w[5];
+			cm2 = w[2] ^ w[6];
+			cm3 = w[3] ^ w[7];
+		}
+
+		/*
+		 * If this was the last iteration, then compute the
+		 * extra block encryption to complete CBC-MAC.
+		 */
+		if (len == 0) {
+			w[0] = cm0;
+			w[1] = cm1;
+			w[2] = cm2;
+			w[3] = cm3;
+			br_aes_ct64_interleave_in(&q[0], &q[4], w);
+			br_aes_ct64_ortho(q);
+			br_aes_ct64_bitslice_encrypt(
+				ctx->num_rounds, sk_exp, q);
+			br_aes_ct64_ortho(q);
+			br_aes_ct64_interleave_out(w, q[0], q[4]);
+			cm0 = w[0];
+			cm1 = w[1];
+			cm2 = w[2];
+			cm3 = w[3];
+			break;
+		}
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_decrypt(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint64_t sk_exp[120];
+	uint64_t q[8];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	memset(q, 0, sizeof q);
+	while (len > 0) {
+		uint32_t w[8], carry;
+		unsigned char tmp[16];
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		w[0] = br_swap32(iv0);
+		w[1] = br_swap32(iv1);
+		w[2] = br_swap32(iv2);
+		w[3] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The block for CBC-MAC.
+		 */
+		w[4] = cm0 ^ br_dec32le(buf +  0);
+		w[5] = cm1 ^ br_dec32le(buf +  4);
+		w[6] = cm2 ^ br_dec32le(buf +  8);
+		w[7] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+		br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
+
+		br_enc32le(tmp +  0, w[0]);
+		br_enc32le(tmp +  4, w[1]);
+		br_enc32le(tmp +  8, w[2]);
+		br_enc32le(tmp + 12, w[3]);
+		xorbuf(buf, tmp, 16);
+		cm0 = w[4];
+		cm1 = w[5];
+		cm2 = w[6];
+		cm3 = w[7];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_ct64_ctrcbc_vtable = {
+	sizeof(br_aes_ct64_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_ct64_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct64_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct64_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_ct64_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_ct64_ctrcbc_mac
+};
diff --git a/src/symcipher/aes_ct_ctrcbc.c b/src/symcipher/aes_ct_ctrcbc.c
new file mode 100644
index 0000000..8ae9fc7
--- /dev/null
+++ b/src/symcipher/aes_ct_ctrcbc.c
@@ -0,0 +1,422 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_init(br_aes_ct_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_ctr(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	buf = data;
+	while (len > 0) {
+		uint32_t q[8], carry;
+		unsigned char tmp[32];
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		q[0] = br_swap32(iv0);
+		q[2] = br_swap32(iv1);
+		q[4] = br_swap32(iv2);
+		q[6] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+		q[1] = br_swap32(iv0);
+		q[3] = br_swap32(iv1);
+		q[5] = br_swap32(iv2);
+		q[7] = br_swap32(iv3);
+		if (len > 16) {
+			iv3 ++;
+			carry = ~(iv3 | -iv3) >> 31;
+			iv2 += carry;
+			carry &= -(~(iv2 | -iv2) >> 31);
+			iv1 += carry;
+			carry &= -(~(iv1 | -iv1) >> 31);
+			iv0 += carry;
+		}
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		br_enc32le(tmp, q[0]);
+		br_enc32le(tmp + 4, q[2]);
+		br_enc32le(tmp + 8, q[4]);
+		br_enc32le(tmp + 12, q[6]);
+		br_enc32le(tmp + 16, q[1]);
+		br_enc32le(tmp + 20, q[3]);
+		br_enc32le(tmp + 24, q[5]);
+		br_enc32le(tmp + 28, q[7]);
+
+		if (len <= 32) {
+			xorbuf(buf, tmp, len);
+			break;
+		}
+		xorbuf(buf, tmp, 32);
+		buf += 32;
+		len -= 32;
+	}
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_mac(const br_aes_ct_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint32_t q[8];
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	buf = data;
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+	q[1] = 0;
+	q[3] = 0;
+	q[5] = 0;
+	q[7] = 0;
+
+	while (len > 0) {
+		q[0] = cm0 ^ br_dec32le(buf +  0);
+		q[2] = cm1 ^ br_dec32le(buf +  4);
+		q[4] = cm2 ^ br_dec32le(buf +  8);
+		q[6] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		cm0 = q[0];
+		cm1 = q[2];
+		cm2 = q[4];
+		cm3 = q[6];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_encrypt(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	/*
+	 * When encrypting, the CBC-MAC processing must be lagging by
+	 * one block, since it operates on the encrypted values, so
+	 * it must wait for that encryption to complete.
+	 */
+
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint32_t sk_exp[120];
+	int first_iter;
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	first_iter = 1;
+	while (len > 0) {
+		uint32_t q[8], carry;
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		q[0] = br_swap32(iv0);
+		q[2] = br_swap32(iv1);
+		q[4] = br_swap32(iv2);
+		q[6] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The odd values are used for CBC-MAC.
+		 */
+		q[1] = cm0;
+		q[3] = cm1;
+		q[5] = cm2;
+		q[7] = cm3;
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		/*
+		 * We do the XOR with the plaintext in 32-bit registers,
+		 * so that the value are available for CBC-MAC processing
+		 * as well.
+		 */
+		q[0] ^= br_dec32le(buf +  0);
+		q[2] ^= br_dec32le(buf +  4);
+		q[4] ^= br_dec32le(buf +  8);
+		q[6] ^= br_dec32le(buf + 12);
+		br_enc32le(buf +  0, q[0]);
+		br_enc32le(buf +  4, q[2]);
+		br_enc32le(buf +  8, q[4]);
+		br_enc32le(buf + 12, q[6]);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * We set the cm* values to the block to encrypt in the
+		 * next iteration.
+		 */
+		if (first_iter) {
+			first_iter = 0;
+			cm0 ^= q[0];
+			cm1 ^= q[2];
+			cm2 ^= q[4];
+			cm3 ^= q[6];
+		} else {
+			cm0 = q[0] ^ q[1];
+			cm1 = q[2] ^ q[3];
+			cm2 = q[4] ^ q[5];
+			cm3 = q[6] ^ q[7];
+		}
+
+		/*
+		 * If this was the last iteration, then compute the
+		 * extra block encryption to complete CBC-MAC.
+		 */
+		if (len == 0) {
+			q[0] = cm0;
+			q[2] = cm1;
+			q[4] = cm2;
+			q[6] = cm3;
+			br_aes_ct_ortho(q);
+			br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+			br_aes_ct_ortho(q);
+			cm0 = q[0];
+			cm1 = q[2];
+			cm2 = q[4];
+			cm3 = q[6];
+			break;
+		}
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_decrypt(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	while (len > 0) {
+		uint32_t q[8], carry;
+		unsigned char tmp[16];
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		q[0] = br_swap32(iv0);
+		q[2] = br_swap32(iv1);
+		q[4] = br_swap32(iv2);
+		q[6] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The odd values are used for CBC-MAC.
+		 */
+		q[1] = cm0 ^ br_dec32le(buf +  0);
+		q[3] = cm1 ^ br_dec32le(buf +  4);
+		q[5] = cm2 ^ br_dec32le(buf +  8);
+		q[7] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		br_enc32le(tmp +  0, q[0]);
+		br_enc32le(tmp +  4, q[2]);
+		br_enc32le(tmp +  8, q[4]);
+		br_enc32le(tmp + 12, q[6]);
+		xorbuf(buf, tmp, 16);
+		cm0 = q[1];
+		cm1 = q[3];
+		cm2 = q[5];
+		cm3 = q[7];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_ct_ctrcbc_vtable = {
+	sizeof(br_aes_ct_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_ct_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_ct_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_ct_ctrcbc_mac
+};
diff --git a/src/symcipher/aes_small_ctrcbc.c b/src/symcipher/aes_small_ctrcbc.c
new file mode 100644
index 0000000..2d6ba32
--- /dev/null
+++ b/src/symcipher/aes_small_ctrcbc.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_init(br_aes_small_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_small_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_ctr(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf, *bctr;
+	uint32_t cc0, cc1, cc2, cc3;
+
+	buf = data;
+	bctr = ctr;
+	cc3 = br_dec32be(bctr +  0);
+	cc2 = br_dec32be(bctr +  4);
+	cc1 = br_dec32be(bctr +  8);
+	cc0 = br_dec32be(bctr + 12);
+	while (len > 0) {
+		unsigned char tmp[16];
+		uint32_t carry;
+
+		br_enc32be(tmp +  0, cc3);
+		br_enc32be(tmp +  4, cc2);
+		br_enc32be(tmp +  8, cc1);
+		br_enc32be(tmp + 12, cc0);
+		br_aes_small_encrypt(ctx->num_rounds, ctx->skey, tmp);
+		xorbuf(buf, tmp, 16);
+		buf += 16;
+		len -= 16;
+		cc0 ++;
+		carry = (~(cc0 | -cc0)) >> 31;
+		cc1 += carry;
+		carry &= (~(cc1 | -cc1)) >> 31;
+		cc2 += carry;
+		carry &= (~(cc2 | -cc2)) >> 31;
+		cc3 += carry;
+	}
+	br_enc32be(bctr +  0, cc3);
+	br_enc32be(bctr +  4, cc2);
+	br_enc32be(bctr +  8, cc1);
+	br_enc32be(bctr + 12, cc0);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_mac(const br_aes_small_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		xorbuf(cbcmac, buf, 16);
+		br_aes_small_encrypt(ctx->num_rounds, ctx->skey, cbcmac);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_encrypt(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_small_ctrcbc_ctr(ctx, ctr, data, len);
+	br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_decrypt(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len);
+	br_aes_small_ctrcbc_ctr(ctx, ctr, data, len);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_small_ctrcbc_vtable = {
+	sizeof(br_aes_small_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_small_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_small_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_small_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_small_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_small_ctrcbc_mac
+};
diff --git a/src/symcipher/aes_x86ni_ctrcbc.c b/src/symcipher/aes_x86ni_ctrcbc.c
new file mode 100644
index 0000000..f57fead
--- /dev/null
+++ b/src/symcipher/aes_x86ni_ctrcbc.c
@@ -0,0 +1,596 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_x86ni_ctrcbc_get_vtable(void)
+{
+	return br_aes_x86ni_supported() ? &br_aes_x86ni_ctrcbc_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_ctrcbc_init(br_aes_x86ni_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_x86ni_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_ctr(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx0, ivx1, ivx2, ivx3;
+	__m128i erev, zero, one, four, notthree;
+	unsigned u;
+
+	buf = data;
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+
+	/*
+	 * Some SSE2 constants.
+	 */
+	erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+		8, 9, 10, 11, 12, 13, 14, 15);
+	zero = _mm_setzero_si128();
+	one = _mm_set_epi64x(0, 1);
+	four = _mm_set_epi64x(0, 4);
+	notthree = _mm_sub_epi64(zero, four);
+
+	/*
+	 * Decode the counter in big-endian and pre-increment the other
+	 * three counters.
+	 */
+	ivx0 = _mm_shuffle_epi8(_mm_loadu_si128((void *)ctr), erev);
+	ivx1 = _mm_add_epi64(ivx0, one);
+	ivx1 = _mm_sub_epi64(ivx1,
+		_mm_slli_si128(_mm_cmpeq_epi64(ivx1, zero), 8));
+	ivx2 = _mm_add_epi64(ivx1, one);
+	ivx2 = _mm_sub_epi64(ivx2,
+		_mm_slli_si128(_mm_cmpeq_epi64(ivx2, zero), 8));
+	ivx3 = _mm_add_epi64(ivx2, one);
+	ivx3 = _mm_sub_epi64(ivx3,
+		_mm_slli_si128(_mm_cmpeq_epi64(ivx3, zero), 8));
+	while (len > 0) {
+		__m128i x0, x1, x2, x3;
+
+		/*
+		 * Load counter values; we need to byteswap them because
+		 * the specification says that they use big-endian.
+		 */
+		x0 = _mm_shuffle_epi8(ivx0, erev);
+		x1 = _mm_shuffle_epi8(ivx1, erev);
+		x2 = _mm_shuffle_epi8(ivx2, erev);
+		x3 = _mm_shuffle_epi8(ivx3, erev);
+
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x2 = _mm_xor_si128(x2, sk[0]);
+		x3 = _mm_xor_si128(x3, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x2 = _mm_aesenc_si128(x2, sk[1]);
+		x3 = _mm_aesenc_si128(x3, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x2 = _mm_aesenc_si128(x2, sk[2]);
+		x3 = _mm_aesenc_si128(x3, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x2 = _mm_aesenc_si128(x2, sk[3]);
+		x3 = _mm_aesenc_si128(x3, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x2 = _mm_aesenc_si128(x2, sk[4]);
+		x3 = _mm_aesenc_si128(x3, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x2 = _mm_aesenc_si128(x2, sk[5]);
+		x3 = _mm_aesenc_si128(x3, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x2 = _mm_aesenc_si128(x2, sk[6]);
+		x3 = _mm_aesenc_si128(x3, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x2 = _mm_aesenc_si128(x2, sk[7]);
+		x3 = _mm_aesenc_si128(x3, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x2 = _mm_aesenc_si128(x2, sk[8]);
+		x3 = _mm_aesenc_si128(x3, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		x2 = _mm_aesenc_si128(x2, sk[9]);
+		x3 = _mm_aesenc_si128(x3, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+			x2 = _mm_aesenclast_si128(x2, sk[10]);
+			x3 = _mm_aesenclast_si128(x3, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x2 = _mm_aesenc_si128(x2, sk[10]);
+			x3 = _mm_aesenc_si128(x3, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x2 = _mm_aesenc_si128(x2, sk[11]);
+			x3 = _mm_aesenc_si128(x3, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+			x2 = _mm_aesenclast_si128(x2, sk[12]);
+			x3 = _mm_aesenclast_si128(x3, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x2 = _mm_aesenc_si128(x2, sk[10]);
+			x3 = _mm_aesenc_si128(x3, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x2 = _mm_aesenc_si128(x2, sk[11]);
+			x3 = _mm_aesenc_si128(x3, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x2 = _mm_aesenc_si128(x2, sk[12]);
+			x3 = _mm_aesenc_si128(x3, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x2 = _mm_aesenc_si128(x2, sk[13]);
+			x3 = _mm_aesenc_si128(x3, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+			x2 = _mm_aesenclast_si128(x2, sk[14]);
+			x3 = _mm_aesenclast_si128(x3, sk[14]);
+		}
+		if (len >= 64) {
+			x0 = _mm_xor_si128(x0,
+				_mm_loadu_si128((void *)(buf +  0)));
+			x1 = _mm_xor_si128(x1,
+				_mm_loadu_si128((void *)(buf + 16)));
+			x2 = _mm_xor_si128(x2,
+				_mm_loadu_si128((void *)(buf + 32)));
+			x3 = _mm_xor_si128(x3,
+				_mm_loadu_si128((void *)(buf + 48)));
+			_mm_storeu_si128((void *)(buf +  0), x0);
+			_mm_storeu_si128((void *)(buf + 16), x1);
+			_mm_storeu_si128((void *)(buf + 32), x2);
+			_mm_storeu_si128((void *)(buf + 48), x3);
+			buf += 64;
+			len -= 64;
+		} else {
+			unsigned char tmp[64];
+
+			_mm_storeu_si128((void *)(tmp +  0), x0);
+			_mm_storeu_si128((void *)(tmp + 16), x1);
+			_mm_storeu_si128((void *)(tmp + 32), x2);
+			_mm_storeu_si128((void *)(tmp + 48), x3);
+			for (u = 0; u < len; u ++) {
+				buf[u] ^= tmp[u];
+			}
+			switch (len) {
+			case 16:
+				ivx0 = ivx1;
+				break;
+			case 32:
+				ivx0 = ivx2;
+				break;
+			case 48:
+				ivx0 = ivx3;
+				break;
+			}
+			break;
+		}
+
+		/*
+		 * Add 4 to each counter value. For carry propagation
+		 * into the upper 64-bit words, we would need to compare
+		 * the results with 4, but SSE2+ has only _signed_
+		 * comparisons. Instead, we mask out the low two bits,
+		 * and check whether the remaining bits are zero.
+		 */
+		ivx0 = _mm_add_epi64(ivx0, four);
+		ivx1 = _mm_add_epi64(ivx1, four);
+		ivx2 = _mm_add_epi64(ivx2, four);
+		ivx3 = _mm_add_epi64(ivx3, four);
+		ivx0 = _mm_sub_epi64(ivx0,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx0, notthree), zero), 8));
+		ivx1 = _mm_sub_epi64(ivx1,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx1, notthree), zero), 8));
+		ivx2 = _mm_sub_epi64(ivx2,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx2, notthree), zero), 8));
+		ivx3 = _mm_sub_epi64(ivx3,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx3, notthree), zero), 8));
+	}
+
+	/*
+	 * Write back new counter value. The loop took care to put the
+	 * right counter value in ivx0.
+	 */
+	_mm_storeu_si128((void *)ctr, _mm_shuffle_epi8(ivx0, erev));
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_mac(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15], ivx;
+	unsigned u;
+
+	buf = data;
+	ivx = _mm_loadu_si128(cbcmac);
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+	while (len > 0) {
+		__m128i x;
+
+		x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx);
+		x = _mm_xor_si128(x, sk[0]);
+		x = _mm_aesenc_si128(x, sk[1]);
+		x = _mm_aesenc_si128(x, sk[2]);
+		x = _mm_aesenc_si128(x, sk[3]);
+		x = _mm_aesenc_si128(x, sk[4]);
+		x = _mm_aesenc_si128(x, sk[5]);
+		x = _mm_aesenc_si128(x, sk[6]);
+		x = _mm_aesenc_si128(x, sk[7]);
+		x = _mm_aesenc_si128(x, sk[8]);
+		x = _mm_aesenc_si128(x, sk[9]);
+		if (num_rounds == 10) {
+			x = _mm_aesenclast_si128(x, sk[10]);
+		} else if (num_rounds == 12) {
+			x = _mm_aesenc_si128(x, sk[10]);
+			x = _mm_aesenc_si128(x, sk[11]);
+			x = _mm_aesenclast_si128(x, sk[12]);
+		} else {
+			x = _mm_aesenc_si128(x, sk[10]);
+			x = _mm_aesenc_si128(x, sk[11]);
+			x = _mm_aesenc_si128(x, sk[12]);
+			x = _mm_aesenc_si128(x, sk[13]);
+			x = _mm_aesenclast_si128(x, sk[14]);
+		}
+		ivx = x;
+		buf += 16;
+		len -= 16;
+	}
+	_mm_storeu_si128(cbcmac, ivx);
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_encrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx, cmx;
+	__m128i erev, zero, one;
+	unsigned u;
+	int first_iter;
+
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+
+	/*
+	 * Some SSE2 constants.
+	 */
+	erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+		8, 9, 10, 11, 12, 13, 14, 15);
+	zero = _mm_setzero_si128();
+	one = _mm_set_epi64x(0, 1);
+
+	/*
+	 * Decode the counter in big-endian.
+	 */
+	ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev);
+	cmx = _mm_loadu_si128(cbcmac);
+
+	buf = data;
+	first_iter = 1;
+	while (len > 0) {
+		__m128i dx, x0, x1;
+
+		/*
+		 * Load initial values:
+		 *   dx   encrypted block of data
+		 *   x0   counter (for CTR encryption)
+		 *   x1   input for CBC-MAC
+		 */
+		dx = _mm_loadu_si128((void *)buf);
+		x0 = _mm_shuffle_epi8(ivx, erev);
+		x1 = cmx;
+
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+		}
+
+		x0 = _mm_xor_si128(x0, dx);
+		if (first_iter) {
+			cmx = _mm_xor_si128(cmx, x0);
+			first_iter = 0;
+		} else {
+			cmx = _mm_xor_si128(x1, x0);
+		}
+		_mm_storeu_si128((void *)buf, x0);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * Increment the counter value.
+		 */
+		ivx = _mm_add_epi64(ivx, one);
+		ivx = _mm_sub_epi64(ivx,
+			_mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8));
+
+		/*
+		 * If this was the last iteration, then compute the
+		 * extra block encryption to complete CBC-MAC.
+		 */
+		if (len == 0) {
+			cmx = _mm_xor_si128(cmx, sk[0]);
+			cmx = _mm_aesenc_si128(cmx, sk[1]);
+			cmx = _mm_aesenc_si128(cmx, sk[2]);
+			cmx = _mm_aesenc_si128(cmx, sk[3]);
+			cmx = _mm_aesenc_si128(cmx, sk[4]);
+			cmx = _mm_aesenc_si128(cmx, sk[5]);
+			cmx = _mm_aesenc_si128(cmx, sk[6]);
+			cmx = _mm_aesenc_si128(cmx, sk[7]);
+			cmx = _mm_aesenc_si128(cmx, sk[8]);
+			cmx = _mm_aesenc_si128(cmx, sk[9]);
+			if (num_rounds == 10) {
+				cmx = _mm_aesenclast_si128(cmx, sk[10]);
+			} else if (num_rounds == 12) {
+				cmx = _mm_aesenc_si128(cmx, sk[10]);
+				cmx = _mm_aesenc_si128(cmx, sk[11]);
+				cmx = _mm_aesenclast_si128(cmx, sk[12]);
+			} else {
+				cmx = _mm_aesenc_si128(cmx, sk[10]);
+				cmx = _mm_aesenc_si128(cmx, sk[11]);
+				cmx = _mm_aesenc_si128(cmx, sk[12]);
+				cmx = _mm_aesenc_si128(cmx, sk[13]);
+				cmx = _mm_aesenclast_si128(cmx, sk[14]);
+			}
+			break;
+		}
+	}
+
+	/*
+	 * Write back new counter value and CBC-MAC value.
+	 */
+	_mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev));
+	_mm_storeu_si128(cbcmac, cmx);
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_decrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx, cmx;
+	__m128i erev, zero, one;
+	unsigned u;
+
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+
+	/*
+	 * Some SSE2 constants.
+	 */
+	erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+		8, 9, 10, 11, 12, 13, 14, 15);
+	zero = _mm_setzero_si128();
+	one = _mm_set_epi64x(0, 1);
+
+	/*
+	 * Decode the counter in big-endian.
+	 */
+	ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev);
+	cmx = _mm_loadu_si128(cbcmac);
+
+	buf = data;
+	while (len > 0) {
+		__m128i dx, x0, x1;
+
+		/*
+		 * Load initial values:
+		 *   dx   encrypted block of data
+		 *   x0   counter (for CTR encryption)
+		 *   x1   input for CBC-MAC
+		 */
+		dx = _mm_loadu_si128((void *)buf);
+		x0 = _mm_shuffle_epi8(ivx, erev);
+		x1 = _mm_xor_si128(cmx, dx);
+
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+		}
+		x0 = _mm_xor_si128(x0, dx);
+		cmx = x1;
+		_mm_storeu_si128((void *)buf, x0);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * Increment the counter value.
+		 */
+		ivx = _mm_add_epi64(ivx, one);
+		ivx = _mm_sub_epi64(ivx,
+			_mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8));
+	}
+
+	/*
+	 * Write back new counter value and CBC-MAC value.
+	 */
+	_mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev));
+	_mm_storeu_si128(cbcmac, cmx);
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_x86ni_ctrcbc_vtable = {
+	sizeof(br_aes_x86ni_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_x86ni_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_x86ni_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_x86ni_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_x86ni_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_x86ni_ctrcbc_mac
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_x86ni_ctrcbc_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/test/test_crypto.c b/test/test_crypto.c
index bea236b..e37034c 100644
--- a/test/test_crypto.c
+++ b/test/test_crypto.c
@@ -3411,6 +3411,224 @@ test_AES_pwr8(void)
 	}
 }
 
+/*
+ * Custom CTR + CBC-MAC AES implementation. Can also do CTR-only, and
+ * CBC-MAC-only. The 'aes_big' implementation (CTR) is used. This is
+ * meant for comparisons.
+ *
+ * If 'ctr' is NULL then no encryption/decryption is done; otherwise,
+ * CTR encryption/decryption is performed (full-block counter) and the
+ * 'ctr' array is updated with the new counter value.
+ *
+ * If 'cbcmac' is NULL then no CBC-MAC is done; otherwise, CBC-MAC is
+ * applied on the encrypted data, with 'cbcmac' as IV and destination
+ * buffer for the output. If 'ctr' is not NULL and 'encrypt' is non-zero,
+ * then CBC-MAC is computed over the result of CTR processing; otherwise,
+ * CBC-MAC is computed over the input data itself.
+ */
+static void
+do_aes_ctrcbc(const void *key, size_t key_len, int encrypt,
+	void *ctr, void *cbcmac, unsigned char *data, size_t len)
+{
+	br_aes_big_ctr_keys bc;
+	int i;
+
+	br_aes_big_ctr_init(&bc, key, key_len);
+	for (i = 0; i < 2; i ++) {
+		/*
+		 * CBC-MAC is computed on the encrypted data, so in
+		 * first pass if decrypting, second pass if encrypting.
+		 */
+		if (cbcmac != NULL
+			&& ((encrypt && i == 1) || (!encrypt && i == 0)))
+		{
+			unsigned char zz[16];
+			size_t u;
+
+			memcpy(zz, cbcmac, sizeof zz);
+			for (u = 0; u < len; u += 16) {
+				unsigned char tmp[16];
+				size_t v;
+
+				for (v = 0; v < 16; v ++) {
+					tmp[v] = zz[v] ^ data[u + v];
+				}
+				memset(zz, 0, sizeof zz);
+				br_aes_big_ctr_run(&bc,
+					tmp, br_dec32be(tmp + 12), zz, 16);
+			}
+			memcpy(cbcmac, zz, sizeof zz);
+		}
+
+		/*
+		 * CTR encryption/decryption is done only in the first pass.
+		 * We process data block per block, because the CTR-only
+		 * class uses a 32-bit counter, while the CTR+CBC-MAC
+		 * class uses a 128-bit counter.
+		 */
+		if (ctr != NULL && i == 0) {
+			unsigned char zz[16];
+			size_t u;
+
+			memcpy(zz, ctr, sizeof zz);
+			for (u = 0; u < len; u += 16) {
+				int i;
+
+				br_aes_big_ctr_run(&bc,
+					zz, br_dec32be(zz + 12), data + u, 16);
+				for (i = 15; i >= 0; i --) {
+					zz[i] = (zz[i] + 1) & 0xFF;
+					if (zz[i] != 0) {
+						break;
+					}
+				}
+			}
+			memcpy(ctr, zz, sizeof zz);
+		}
+	}
+}
+
+static void
+test_AES_CTRCBC_inner(const char *name, const br_block_ctrcbc_class *vt)
+{
+	br_hmac_drbg_context rng;
+	size_t key_len;
+
+	printf("Test AES CTR/CBC-MAC %s: ", name);
+	fflush(stdout);
+
+	br_hmac_drbg_init(&rng, &br_sha256_vtable, name, strlen(name));
+	for (key_len = 16; key_len <= 32; key_len += 8) {
+		br_aes_gen_ctrcbc_keys bc;
+		unsigned char key[32];
+		size_t data_len;
+
+		br_hmac_drbg_generate(&rng, key, key_len);
+		vt->init(&bc.vtable, key, key_len);
+		for (data_len = 0; data_len <= 512; data_len += 16) {
+			unsigned char plain[512];
+			unsigned char data1[sizeof plain];
+			unsigned char data2[sizeof plain];
+			unsigned char ctr[16], cbcmac[16];
+			unsigned char ctr1[16], cbcmac1[16];
+			unsigned char ctr2[16], cbcmac2[16];
+			int i;
+
+			br_hmac_drbg_generate(&rng, plain, data_len);
+
+			for (i = 0; i <= 16; i ++) {
+				if (i == 0) {
+					br_hmac_drbg_generate(&rng, ctr, 16);
+				} else {
+					memset(ctr, 0, i - 1);
+					memset(ctr + i - 1, 0xFF, 17 - i);
+				}
+				br_hmac_drbg_generate(&rng, cbcmac, 16);
+
+				memcpy(data1, plain, data_len);
+				memcpy(ctr1, ctr, 16);
+				vt->ctr(&bc.vtable, ctr1, data1, data_len);
+				memcpy(data2, plain, data_len);
+				memcpy(ctr2, ctr, 16);
+				do_aes_ctrcbc(key, key_len, 1,
+					ctr2, NULL, data2, data_len);
+				check_equals("CTR-only data",
+					data1, data2, data_len);
+				check_equals("CTR-only counter",
+					ctr1, ctr2, 16);
+
+				memcpy(data1, plain, data_len);
+				memcpy(cbcmac1, cbcmac, 16);
+				vt->mac(&bc.vtable, cbcmac1, data1, data_len);
+				memcpy(data2, plain, data_len);
+				memcpy(cbcmac2, cbcmac, 16);
+				do_aes_ctrcbc(key, key_len, 1,
+					NULL, cbcmac2, data2, data_len);
+				check_equals("CBC-MAC-only",
+					cbcmac1, cbcmac2, 16);
+
+				memcpy(data1, plain, data_len);
+				memcpy(ctr1, ctr, 16);
+				memcpy(cbcmac1, cbcmac, 16);
+				vt->encrypt(&bc.vtable,
+					ctr1, cbcmac1, data1, data_len);
+				memcpy(data2, plain, data_len);
+				memcpy(ctr2, ctr, 16);
+				memcpy(cbcmac2, cbcmac, 16);
+				do_aes_ctrcbc(key, key_len, 1,
+					ctr2, cbcmac2, data2, data_len);
+				check_equals("encrypt: combined data",
+					data1, data2, data_len);
+				check_equals("encrypt: combined counter",
+					ctr1, ctr2, 16);
+				check_equals("encrypt: combined CBC-MAC",
+					cbcmac1, cbcmac2, 16);
+
+				memcpy(ctr1, ctr, 16);
+				memcpy(cbcmac1, cbcmac, 16);
+				vt->decrypt(&bc.vtable,
+					ctr1, cbcmac1, data1, data_len);
+				memcpy(ctr2, ctr, 16);
+				memcpy(cbcmac2, cbcmac, 16);
+				do_aes_ctrcbc(key, key_len, 0,
+					ctr2, cbcmac2, data2, data_len);
+				check_equals("decrypt: combined data",
+					data1, data2, data_len);
+				check_equals("decrypt: combined counter",
+					ctr1, ctr2, 16);
+				check_equals("decrypt: combined CBC-MAC",
+					cbcmac1, cbcmac2, 16);
+			}
+
+			printf(".");
+			fflush(stdout);
+		}
+
+		printf(" ");
+		fflush(stdout);
+	}
+
+	printf("done.\n");
+	fflush(stdout);
+}
+
+static void
+test_AES_CTRCBC_big(void)
+{
+	test_AES_CTRCBC_inner("big", &br_aes_big_ctrcbc_vtable);
+}
+
+static void
+test_AES_CTRCBC_small(void)
+{
+	test_AES_CTRCBC_inner("small", &br_aes_small_ctrcbc_vtable);
+}
+
+static void
+test_AES_CTRCBC_ct(void)
+{
+	test_AES_CTRCBC_inner("ct", &br_aes_ct_ctrcbc_vtable);
+}
+
+static void
+test_AES_CTRCBC_ct64(void)
+{
+	test_AES_CTRCBC_inner("ct64", &br_aes_ct64_ctrcbc_vtable);
+}
+
+static void
+test_AES_CTRCBC_x86ni(void)
+{
+	const br_block_ctrcbc_class *vt;
+
+	vt = br_aes_x86ni_ctrcbc_get_vtable();
+	if (vt != NULL) {
+		test_AES_CTRCBC_inner("x86ni", vt);
+	} else {
+		printf("Test AES CTR/CBC-MAC x86ni: UNAVAILABLE\n");
+	}
+}
+
 /*
  * DES known-answer tests. Order: plaintext, key, ciphertext.
  * (mostly from NIST SP 800-20).
@@ -5077,7 +5295,7 @@ test_GCM(void)
 		br_aes_ct_ctr_keys bc;
 		br_gcm_context gc;
 		unsigned char tmp[100], out[16];
-		size_t v;
+		size_t v, tag_len;
 
 		key_len = hextobin(key, KAT_GCM[u]);
 		plain_len = hextobin(plain, KAT_GCM[u + 1]);
@@ -5167,6 +5385,268 @@ test_GCM(void)
 			}
 		}
 
+		/*
+		 * Tag truncation.
+		 */
+		for (tag_len = 1; tag_len <= 16; tag_len ++) {
+			memset(out, 0x54, sizeof out);
+			memcpy(tmp, plain, plain_len);
+			br_gcm_reset(&gc, iv, iv_len);
+			br_gcm_aad_inject(&gc, aad, aad_len);
+			br_gcm_flip(&gc);
+			br_gcm_run(&gc, 1, tmp, plain_len);
+			br_gcm_get_tag_trunc(&gc, out, tag_len);
+			check_equals("KAT GCM 8", out, tag, tag_len);
+			for (v = tag_len; v < sizeof out; v ++) {
+				if (out[v] != 0x54) {
+					fprintf(stderr, "overflow on tag\n");
+					exit(EXIT_FAILURE);
+				}
+			}
+
+			memcpy(tmp, plain, plain_len);
+			br_gcm_reset(&gc, iv, iv_len);
+			br_gcm_aad_inject(&gc, aad, aad_len);
+			br_gcm_flip(&gc);
+			br_gcm_run(&gc, 1, tmp, plain_len);
+			if (!br_gcm_check_tag_trunc(&gc, out, tag_len)) {
+				fprintf(stderr, "Tag not verified (3)\n");
+				exit(EXIT_FAILURE);
+			}
+		}
+
+		printf(".");
+		fflush(stdout);
+	}
+
+	printf(" done.\n");
+	fflush(stdout);
+}
+
+/*
+ * From "The EAX Mode of Operation (A Two-Pass Authenticated Encryption
+ * Scheme Optimized for Simplicity and Efficiency)" (Bellare, Rogaway,
+ * Wagner), presented at FSE 2004. Full article is available at:
+ *   http://web.cs.ucdavis.edu/~rogaway/papers/eax.html
+ *
+ * EAX specification concatenates the authentication tag at the end of
+ * the ciphertext; in our API and the vectors below, the tag is separate.
+ *
+ * Order is: plaintext, key, nonce, header, ciphertext, tag.
+ */
+static const char *const KAT_EAX[] = {
+	"",
+	"233952dee4d5ed5f9b9c6d6ff80ff478",
+	"62ec67f9c3a4a407fcb2a8c49031a8b3",
+	"6bfb914fd07eae6b",
+	"",
+	"e037830e8389f27b025a2d6527e79d01",
+
+	"f7fb",
+	"91945d3f4dcbee0bf45ef52255f095a4",
+	"becaf043b0a23d843194ba972c66debd",
+	"fa3bfd4806eb53fa",
+	"19dd",
+	"5c4c9331049d0bdab0277408f67967e5",
+
+	"1a47cb4933",
+	"01f74ad64077f2e704c0f60ada3dd523",
+	"70c3db4f0d26368400a10ed05d2bff5e",
+	"234a3463c1264ac6",
+	"d851d5bae0",
+	"3a59f238a23e39199dc9266626c40f80",
+
+	"481c9e39b1",
+	"d07cf6cbb7f313bdde66b727afd3c5e8",
+	"8408dfff3c1a2b1292dc199e46b7d617",
+	"33cce2eabff5a79d",
+	"632a9d131a",
+	"d4c168a4225d8e1ff755939974a7bede",
+
+	"40d0c07da5e4",
+	"35b6d0580005bbc12b0587124557d2c2",
+	"fdb6b06676eedc5c61d74276e1f8e816",
+	"aeb96eaebe2970e9",
+	"071dfe16c675",
+	"cb0677e536f73afe6a14b74ee49844dd",
+
+	"4de3b35c3fc039245bd1fb7d",
+	"bd8e6e11475e60b268784c38c62feb22",
+	"6eac5c93072d8e8513f750935e46da1b",
+	"d4482d1ca78dce0f",
+	"835bb4f15d743e350e728414",
+	"abb8644fd6ccb86947c5e10590210a4f",
+
+	"8b0a79306c9ce7ed99dae4f87f8dd61636",
+	"7c77d6e813bed5ac98baa417477a2e7d",
+	"1a8c98dcd73d38393b2bf1569deefc19",
+	"65d2017990d62528",
+	"02083e3979da014812f59f11d52630da30",
+	"137327d10649b0aa6e1c181db617d7f2",
+
+	"1bda122bce8a8dbaf1877d962b8592dd2d56",
+	"5fff20cafab119ca2fc73549e20f5b0d",
+	"dde59b97d722156d4d9aff2bc7559826",
+	"54b9f04e6a09189a",
+	"2ec47b2c4954a489afc7ba4897edcdae8cc3",
+	"3b60450599bd02c96382902aef7f832a",
+
+	"6cf36720872b8513f6eab1a8a44438d5ef11",
+	"a4a4782bcffd3ec5e7ef6d8c34a56123",
+	"b781fcf2f75fa5a8de97a9ca48e522ec",
+	"899a175897561d7e",
+	"0de18fd0fdd91e7af19f1d8ee8733938b1e8",
+	"e7f6d2231618102fdb7fe55ff1991700",
+
+	"ca40d7446e545ffaed3bd12a740a659ffbbb3ceab7",
+	"8395fcf1e95bebd697bd010bc766aac3",
+	"22e7add93cfc6393c57ec0b3c17d6b44",
+	"126735fcc320d25a",
+	"cb8920f87a6c75cff39627b56e3ed197c552d295a7",
+	"cfc46afc253b4652b1af3795b124ab6e",
+
+	NULL
+};
+
+static void
+test_EAX_inner(const char *name, const br_block_ctrcbc_class *vt)
+{
+	size_t u;
+
+	printf("Test EAX %s: ", name);
+	fflush(stdout);
+
+	for (u = 0; KAT_EAX[u]; u += 6) {
+		unsigned char plain[100];
+		unsigned char key[32];
+		unsigned char nonce[100];
+		unsigned char aad[100];
+		unsigned char cipher[100];
+		unsigned char tag[100];
+		size_t plain_len, key_len, nonce_len, aad_len;
+		br_aes_gen_ctrcbc_keys bc;
+		br_eax_context ec;
+		unsigned char tmp[100], out[16];
+		size_t v, tag_len;
+
+		plain_len = hextobin(plain, KAT_EAX[u]);
+		key_len = hextobin(key, KAT_EAX[u + 1]);
+		nonce_len = hextobin(nonce, KAT_EAX[u + 2]);
+		aad_len = hextobin(aad, KAT_EAX[u + 3]);
+		hextobin(cipher, KAT_EAX[u + 4]);
+		hextobin(tag, KAT_EAX[u + 5]);
+
+		vt->init(&bc.vtable, key, key_len);
+		br_eax_init(&ec, &bc.vtable);
+
+		memset(tmp, 0x54, sizeof tmp);
+
+		/*
+		 * Basic operation.
+		 */
+		memcpy(tmp, plain, plain_len);
+		br_eax_reset(&ec, nonce, nonce_len);
+		br_eax_aad_inject(&ec, aad, aad_len);
+		br_eax_flip(&ec);
+		br_eax_run(&ec, 1, tmp, plain_len);
+		br_eax_get_tag(&ec, out);
+		check_equals("KAT EAX 1", tmp, cipher, plain_len);
+		check_equals("KAT EAX 2", out, tag, 16);
+
+		br_eax_reset(&ec, nonce, nonce_len);
+		br_eax_aad_inject(&ec, aad, aad_len);
+		br_eax_flip(&ec);
+		br_eax_run(&ec, 0, tmp, plain_len);
+		check_equals("KAT EAX 3", tmp, plain, plain_len);
+		if (!br_eax_check_tag(&ec, tag)) {
+			fprintf(stderr, "Tag not verified (1)\n");
+			exit(EXIT_FAILURE);
+		}
+
+		for (v = plain_len; v < sizeof tmp; v ++) {
+			if (tmp[v] != 0x54) {
+				fprintf(stderr, "overflow on data\n");
+				exit(EXIT_FAILURE);
+			}
+		}
+
+		/*
+		 * Byte-by-byte injection.
+		 */
+		br_eax_reset(&ec, nonce, nonce_len);
+		for (v = 0; v < aad_len; v ++) {
+			br_eax_aad_inject(&ec, aad + v, 1);
+		}
+		br_eax_flip(&ec);
+		for (v = 0; v < plain_len; v ++) {
+			br_eax_run(&ec, 1, tmp + v, 1);
+		}
+		check_equals("KAT EAX 4", tmp, cipher, plain_len);
+		if (!br_eax_check_tag(&ec, tag)) {
+			fprintf(stderr, "Tag not verified (2)\n");
+			exit(EXIT_FAILURE);
+		}
+
+		br_eax_reset(&ec, nonce, nonce_len);
+		for (v = 0; v < aad_len; v ++) {
+			br_eax_aad_inject(&ec, aad + v, 1);
+		}
+		br_eax_flip(&ec);
+		for (v = 0; v < plain_len; v ++) {
+			br_eax_run(&ec, 0, tmp + v, 1);
+		}
+		br_eax_get_tag(&ec, out);
+		check_equals("KAT EAX 5", tmp, plain, plain_len);
+		check_equals("KAT EAX 6", out, tag, 16);
+
+		/*
+		 * Check that alterations are detected.
+		 */
+		for (v = 0; v < aad_len; v ++) {
+			memcpy(tmp, cipher, plain_len);
+			br_eax_reset(&ec, nonce, nonce_len);
+			aad[v] ^= 0x04;
+			br_eax_aad_inject(&ec, aad, aad_len);
+			aad[v] ^= 0x04;
+			br_eax_flip(&ec);
+			br_eax_run(&ec, 0, tmp, plain_len);
+			check_equals("KAT EAX 7", tmp, plain, plain_len);
+			if (br_eax_check_tag(&ec, tag)) {
+				fprintf(stderr, "Tag should have changed\n");
+				exit(EXIT_FAILURE);
+			}
+		}
+
+		/*
+		 * Tag truncation.
+		 */
+		for (tag_len = 1; tag_len <= 16; tag_len ++) {
+			memset(out, 0x54, sizeof out);
+			memcpy(tmp, plain, plain_len);
+			br_eax_reset(&ec, nonce, nonce_len);
+			br_eax_aad_inject(&ec, aad, aad_len);
+			br_eax_flip(&ec);
+			br_eax_run(&ec, 1, tmp, plain_len);
+			br_eax_get_tag_trunc(&ec, out, tag_len);
+			check_equals("KAT EAX 8", out, tag, tag_len);
+			for (v = tag_len; v < sizeof out; v ++) {
+				if (out[v] != 0x54) {
+					fprintf(stderr, "overflow on tag\n");
+					exit(EXIT_FAILURE);
+				}
+			}
+
+			memcpy(tmp, plain, plain_len);
+			br_eax_reset(&ec, nonce, nonce_len);
+			br_eax_aad_inject(&ec, aad, aad_len);
+			br_eax_flip(&ec);
+			br_eax_run(&ec, 1, tmp, plain_len);
+			if (!br_eax_check_tag_trunc(&ec, out, tag_len)) {
+				fprintf(stderr, "Tag not verified (3)\n");
+				exit(EXIT_FAILURE);
+			}
+		}
+
 		printf(".");
 		fflush(stdout);
 	}
@@ -5175,6 +5655,236 @@ test_GCM(void)
 	fflush(stdout);
 }
 
+static void
+test_EAX(void)
+{
+	const br_block_ctrcbc_class *x_ctrcbc;
+
+	test_EAX_inner("aes_big", &br_aes_big_ctrcbc_vtable);
+	test_EAX_inner("aes_small", &br_aes_small_ctrcbc_vtable);
+	test_EAX_inner("aes_ct", &br_aes_ct_ctrcbc_vtable);
+	test_EAX_inner("aes_ct64", &br_aes_ct64_ctrcbc_vtable);
+
+	x_ctrcbc = br_aes_x86ni_ctrcbc_get_vtable();
+	if (x_ctrcbc != NULL) {
+		test_EAX_inner("aes_x86ni", x_ctrcbc);
+	} else {
+		printf("Test EAX aes_x86ni: UNAVAILABLE\n");
+	}
+}
+
+/*
+ * From NIST SP 800-38C, appendix C.
+ *
+ * CCM specification concatenates the authentication tag at the end of
+ * the ciphertext; in our API and the vectors below, the tag is separate.
+ *
+ * Order is: key, nonce, aad, plaintext, ciphertext, tag.
+ */
+static const char *const KAT_CCM[] = {
+	"404142434445464748494a4b4c4d4e4f",
+	"10111213141516",
+	"0001020304050607",
+	"20212223",
+	"7162015b",
+	"4dac255d",
+
+	"404142434445464748494a4b4c4d4e4f",
+	"1011121314151617",
+	"000102030405060708090a0b0c0d0e0f",
+	"202122232425262728292a2b2c2d2e2f",
+	"d2a1f0e051ea5f62081a7792073d593d",
+	"1fc64fbfaccd",
+
+	"404142434445464748494a4b4c4d4e4f",
+	"101112131415161718191a1b",
+	"000102030405060708090a0b0c0d0e0f10111213",
+	"202122232425262728292a2b2c2d2e2f3031323334353637",
+	"e3b201a9f5b71a7a9b1ceaeccd97e70b6176aad9a4428aa5",
+	"484392fbc1b09951",
+
+	"404142434445464748494a4b4c4d4e4f",
+	"101112131415161718191a1b1c",
+	NULL,
+	"202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f",
+	"69915dad1e84c6376a68c2967e4dab615ae0fd1faec44cc484828529463ccf72",
+	"b4ac6bec93e8598e7f0dadbcea5b",
+
+	NULL
+};
+
+static void
+test_CCM_inner(const char *name, const br_block_ctrcbc_class *vt)
+{
+	size_t u;
+
+	printf("Test CCM %s: ", name);
+	fflush(stdout);
+
+	for (u = 0; KAT_CCM[u]; u += 6) {
+		unsigned char plain[100];
+		unsigned char key[32];
+		unsigned char nonce[100];
+		unsigned char aad_buf[100], *aad;
+		unsigned char cipher[100];
+		unsigned char tag[100];
+		size_t plain_len, key_len, nonce_len, aad_len, tag_len;
+		br_aes_gen_ctrcbc_keys bc;
+		br_ccm_context ec;
+		unsigned char tmp[100], out[16];
+		size_t v;
+
+		key_len = hextobin(key, KAT_CCM[u]);
+		nonce_len = hextobin(nonce, KAT_CCM[u + 1]);
+		if (KAT_CCM[u + 2] == NULL) {
+			aad_len = 65536;
+			aad = malloc(aad_len);
+			if (aad == NULL) {
+				fprintf(stderr, "OOM error\n");
+				exit(EXIT_FAILURE);
+			}
+			for (v = 0; v < 65536; v ++) {
+				aad[v] = (unsigned char)v;
+			}
+		} else {
+			aad = aad_buf;
+			aad_len = hextobin(aad, KAT_CCM[u + 2]);
+		}
+		plain_len = hextobin(plain, KAT_CCM[u + 3]);
+		hextobin(cipher, KAT_CCM[u + 4]);
+		tag_len = hextobin(tag, KAT_CCM[u + 5]);
+
+		vt->init(&bc.vtable, key, key_len);
+		br_ccm_init(&ec, &bc.vtable);
+
+		memset(tmp, 0x54, sizeof tmp);
+
+		/*
+		 * Basic operation.
+		 */
+		memcpy(tmp, plain, plain_len);
+		if (!br_ccm_reset(&ec, nonce, nonce_len,
+			aad_len, plain_len, tag_len))
+		{
+			fprintf(stderr, "CCM reset failed\n");
+			exit(EXIT_FAILURE);
+		}
+		br_ccm_aad_inject(&ec, aad, aad_len);
+		br_ccm_flip(&ec);
+		br_ccm_run(&ec, 1, tmp, plain_len);
+		if (br_ccm_get_tag(&ec, out) != tag_len) {
+			fprintf(stderr, "CCM returned wrong tag length\n");
+			exit(EXIT_FAILURE);
+		}
+		check_equals("KAT CCM 1", tmp, cipher, plain_len);
+		check_equals("KAT CCM 2", out, tag, tag_len);
+
+		br_ccm_reset(&ec, nonce, nonce_len,
+			aad_len, plain_len, tag_len);
+		br_ccm_aad_inject(&ec, aad, aad_len);
+		br_ccm_flip(&ec);
+		br_ccm_run(&ec, 0, tmp, plain_len);
+		check_equals("KAT CCM 3", tmp, plain, plain_len);
+		if (!br_ccm_check_tag(&ec, tag)) {
+			fprintf(stderr, "Tag not verified (1)\n");
+			exit(EXIT_FAILURE);
+		}
+
+		for (v = plain_len; v < sizeof tmp; v ++) {
+			if (tmp[v] != 0x54) {
+				fprintf(stderr, "overflow on data\n");
+				exit(EXIT_FAILURE);
+			}
+		}
+
+		/*
+		 * Byte-by-byte injection.
+		 */
+		br_ccm_reset(&ec, nonce, nonce_len,
+			aad_len, plain_len, tag_len);
+		for (v = 0; v < aad_len; v ++) {
+			br_ccm_aad_inject(&ec, aad + v, 1);
+		}
+		br_ccm_flip(&ec);
+		for (v = 0; v < plain_len; v ++) {
+			br_ccm_run(&ec, 1, tmp + v, 1);
+		}
+		check_equals("KAT CCM 4", tmp, cipher, plain_len);
+		if (!br_ccm_check_tag(&ec, tag)) {
+			fprintf(stderr, "Tag not verified (2)\n");
+			exit(EXIT_FAILURE);
+		}
+
+		br_ccm_reset(&ec, nonce, nonce_len,
+			aad_len, plain_len, tag_len);
+		for (v = 0; v < aad_len; v ++) {
+			br_ccm_aad_inject(&ec, aad + v, 1);
+		}
+		br_ccm_flip(&ec);
+		for (v = 0; v < plain_len; v ++) {
+			br_ccm_run(&ec, 0, tmp + v, 1);
+		}
+		br_ccm_get_tag(&ec, out);
+		check_equals("KAT CCM 5", tmp, plain, plain_len);
+		check_equals("KAT CCM 6", out, tag, tag_len);
+
+		/*
+		 * Check that alterations are detected.
+		 */
+		for (v = 0; v < aad_len; v ++) {
+			memcpy(tmp, cipher, plain_len);
+			br_ccm_reset(&ec, nonce, nonce_len,
+				aad_len, plain_len, tag_len);
+			aad[v] ^= 0x04;
+			br_ccm_aad_inject(&ec, aad, aad_len);
+			aad[v] ^= 0x04;
+			br_ccm_flip(&ec);
+			br_ccm_run(&ec, 0, tmp, plain_len);
+			check_equals("KAT CCM 7", tmp, plain, plain_len);
+			if (br_ccm_check_tag(&ec, tag)) {
+				fprintf(stderr, "Tag should have changed\n");
+				exit(EXIT_FAILURE);
+			}
+
+			/*
+			 * When the AAD is really big, we don't want to do
+			 * the complete quadratic operation.
+			 */
+			if (v >= 32) {
+				break;
+			}
+		}
+
+		if (aad != aad_buf) {
+			free(aad);
+		}
+
+		printf(".");
+		fflush(stdout);
+	}
+
+	printf(" done.\n");
+	fflush(stdout);
+}
+
+static void
+test_CCM(void)
+{
+	const br_block_ctrcbc_class *x_ctrcbc;
+
+	test_CCM_inner("aes_big", &br_aes_big_ctrcbc_vtable);
+	test_CCM_inner("aes_small", &br_aes_small_ctrcbc_vtable);
+	test_CCM_inner("aes_ct", &br_aes_ct_ctrcbc_vtable);
+	test_CCM_inner("aes_ct64", &br_aes_ct64_ctrcbc_vtable);
+
+	x_ctrcbc = br_aes_x86ni_ctrcbc_get_vtable();
+	if (x_ctrcbc != NULL) {
+		test_CCM_inner("aes_x86ni", x_ctrcbc);
+	} else {
+		printf("Test CCM aes_x86ni: UNAVAILABLE\n");
+	}
+}
+
 static void
 test_EC_inner(const char *sk, const char *sU,
 	const br_ec_impl *impl, int curve)
@@ -6201,6 +6911,11 @@ static const struct {
 	STU(AES_ct64),
 	STU(AES_pwr8),
 	STU(AES_x86ni),
+	STU(AES_CTRCBC_big),
+	STU(AES_CTRCBC_small),
+	STU(AES_CTRCBC_ct),
+	STU(AES_CTRCBC_ct64),
+	STU(AES_CTRCBC_x86ni),
 	STU(DES_tab),
 	STU(DES_ct),
 	STU(ChaCha20_ct),
@@ -6218,6 +6933,8 @@ static const struct {
 	STU(GHASH_ctmul64),
 	STU(GHASH_pclmul),
 	STU(GHASH_pwr8),
+	STU(CCM),
+	STU(EAX),
 	STU(GCM),
 	STU(EC_prime_i15),
 	STU(EC_prime_i31),
diff --git a/test/test_speed.c b/test/test_speed.c
index 296e914..245a840 100644
--- a/test/test_speed.c
+++ b/test/test_speed.c
@@ -443,6 +443,82 @@ test_speed_poly1305_i15(void)
 	test_speed_poly1305_inner("Poly1305 (i15)", &br_poly1305_i15_run);
 }
 
+static void
+test_speed_eax_inner(char *name,
+	const br_block_ctrcbc_class *vt, size_t key_len)
+{
+	unsigned char buf[8192], key[32], nonce[16], aad[16], tag[16];
+	int i;
+	long num;
+	br_aes_gen_ctrcbc_keys ac;
+	br_eax_context ec;
+
+	memset(key, 'K', key_len);
+	memset(nonce, 'N', sizeof nonce);
+	memset(aad, 'A', sizeof aad);
+	memset(buf, 'T', sizeof buf);
+	for (i = 0; i < 10; i ++) {
+		vt->init(&ac.vtable, key, key_len);
+		br_eax_init(&ec, &ac.vtable);
+		br_eax_reset(&ec, nonce, sizeof nonce);
+		br_eax_aad_inject(&ec, aad, sizeof aad);
+		br_eax_flip(&ec);
+		br_eax_run(&ec, 1, buf, sizeof buf);
+		br_eax_get_tag(&ec, tag);
+	}
+	num = 10;
+	for (;;) {
+		clock_t begin, end;
+		double tt;
+		long k;
+
+		begin = clock();
+		for (k = num; k > 0; k --) {
+			vt->init(&ac.vtable, key, key_len);
+			br_eax_init(&ec, &ac.vtable);
+			br_eax_reset(&ec, nonce, sizeof nonce);
+			br_eax_aad_inject(&ec, aad, sizeof aad);
+			br_eax_flip(&ec);
+			br_eax_run(&ec, 1, buf, sizeof buf);
+			br_eax_get_tag(&ec, tag);
+		}
+		end = clock();
+		tt = (double)(end - begin) / CLOCKS_PER_SEC;
+		if (tt >= 2.0) {
+			printf("%-30s %8.2f MB/s\n", name,
+				((double)sizeof buf) * (double)num
+				/ (tt * 1000000.0));
+			fflush(stdout);
+			return;
+		}
+		num <<= 1;
+	}
+}
+
+#define SPEED_EAX(Algo, algo, keysize, impl) \
+static void \
+test_speed_eax_ ## algo ## keysize ## _ ## impl(void) \
+{ \
+	test_speed_eax_inner("EAX " #Algo "-" #keysize "(" #impl ")", \
+		&br_ ## algo ## _ ## impl ##  _ctrcbc_vtable, (keysize) >> 3); \
+}
+
+SPEED_EAX(AES, aes, 128, big)
+SPEED_EAX(AES, aes, 128, small)
+SPEED_EAX(AES, aes, 128, ct)
+SPEED_EAX(AES, aes, 128, ct64)
+SPEED_EAX(AES, aes, 128, x86ni)
+SPEED_EAX(AES, aes, 192, big)
+SPEED_EAX(AES, aes, 192, small)
+SPEED_EAX(AES, aes, 192, ct)
+SPEED_EAX(AES, aes, 192, ct64)
+SPEED_EAX(AES, aes, 192, x86ni)
+SPEED_EAX(AES, aes, 256, big)
+SPEED_EAX(AES, aes, 256, small)
+SPEED_EAX(AES, aes, 256, ct)
+SPEED_EAX(AES, aes, 256, ct64)
+SPEED_EAX(AES, aes, 256, x86ni)
+
 static const unsigned char RSA_N[] = {
 	0xE9, 0xF2, 0x4A, 0x2F, 0x96, 0xDF, 0x0A, 0x23,
 	0x01, 0x85, 0xF1, 0x2C, 0xB2, 0xA8, 0xEF, 0x23,
@@ -1300,6 +1376,22 @@ static const struct {
 	STU(poly1305_ctmulq),
 	STU(poly1305_i15),
 
+	STU(eax_aes128_big),
+	STU(eax_aes192_big),
+	STU(eax_aes256_big),
+	STU(eax_aes128_small),
+	STU(eax_aes192_small),
+	STU(eax_aes256_small),
+	STU(eax_aes128_ct),
+	STU(eax_aes192_ct),
+	STU(eax_aes256_ct),
+	STU(eax_aes128_ct64),
+	STU(eax_aes192_ct64),
+	STU(eax_aes256_ct64),
+	STU(eax_aes128_x86ni),
+	STU(eax_aes192_x86ni),
+	STU(eax_aes256_x86ni),
+
 	STU(rsa_i15),
 	STU(rsa_i31),
 	STU(rsa_i32),