2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #define BR_POWER_ASM_MACROS 1
30 /* see bearssl_block.h */
32 br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys
*ctx
,
33 const void *key
, size_t len
)
35 ctx
->vtable
= &br_aes_pwr8_cbcenc_vtable
;
36 ctx
->num_rounds
= br_aes_pwr8_keysched(ctx
->skey
.skni
, key
, len
);
40 cbcenc_128(const unsigned char *sk
,
41 const unsigned char *iv
, unsigned char *buf
, size_t len
)
46 static const uint32_t idx2be
[] = {
47 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
55 * Load subkeys into v0..v10
57 lxvw4x(32, %[cc
], %[sk
])
58 addi(%[cc
], %[cc
], 16)
59 lxvw4x(33, %[cc
], %[sk
])
60 addi(%[cc
], %[cc
], 16)
61 lxvw4x(34, %[cc
], %[sk
])
62 addi(%[cc
], %[cc
], 16)
63 lxvw4x(35, %[cc
], %[sk
])
64 addi(%[cc
], %[cc
], 16)
65 lxvw4x(36, %[cc
], %[sk
])
66 addi(%[cc
], %[cc
], 16)
67 lxvw4x(37, %[cc
], %[sk
])
68 addi(%[cc
], %[cc
], 16)
69 lxvw4x(38, %[cc
], %[sk
])
70 addi(%[cc
], %[cc
], 16)
71 lxvw4x(39, %[cc
], %[sk
])
72 addi(%[cc
], %[cc
], 16)
73 lxvw4x(40, %[cc
], %[sk
])
74 addi(%[cc
], %[cc
], 16)
75 lxvw4x(41, %[cc
], %[sk
])
76 addi(%[cc
], %[cc
], 16)
77 lxvw4x(42, %[cc
], %[sk
])
81 * v15 = constant for byteswapping words
83 lxvw4x(47, 0, %[idx2be
])
96 * Load next plaintext word and XOR with current IV.
100 vperm(17, 17, 17, 15)
117 vcipherlast(16, 16, 10)
120 * Store back result (with byteswap)
123 vperm(17, 16, 16, 15)
124 stxvw4x(49, 0, %[buf
])
126 stxvw4x(48, 0, %[buf
])
128 addi(%[buf
], %[buf
], 16)
132 : [cc
] "+b" (cc
), [buf
] "+b" (buf
)
133 : [sk
] "b" (sk
), [iv
] "b" (iv
), [num_blocks
] "b" (len
>> 4)
135 , [idx2be
] "b" (idx2be
)
137 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
138 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
144 cbcenc_192(const unsigned char *sk
,
145 const unsigned char *iv
, unsigned char *buf
, size_t len
)
150 static const uint32_t idx2be
[] = {
151 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
159 * Load subkeys into v0..v12
161 lxvw4x(32, %[cc
], %[sk
])
162 addi(%[cc
], %[cc
], 16)
163 lxvw4x(33, %[cc
], %[sk
])
164 addi(%[cc
], %[cc
], 16)
165 lxvw4x(34, %[cc
], %[sk
])
166 addi(%[cc
], %[cc
], 16)
167 lxvw4x(35, %[cc
], %[sk
])
168 addi(%[cc
], %[cc
], 16)
169 lxvw4x(36, %[cc
], %[sk
])
170 addi(%[cc
], %[cc
], 16)
171 lxvw4x(37, %[cc
], %[sk
])
172 addi(%[cc
], %[cc
], 16)
173 lxvw4x(38, %[cc
], %[sk
])
174 addi(%[cc
], %[cc
], 16)
175 lxvw4x(39, %[cc
], %[sk
])
176 addi(%[cc
], %[cc
], 16)
177 lxvw4x(40, %[cc
], %[sk
])
178 addi(%[cc
], %[cc
], 16)
179 lxvw4x(41, %[cc
], %[sk
])
180 addi(%[cc
], %[cc
], 16)
181 lxvw4x(42, %[cc
], %[sk
])
182 addi(%[cc
], %[cc
], 16)
183 lxvw4x(43, %[cc
], %[sk
])
184 addi(%[cc
], %[cc
], 16)
185 lxvw4x(44, %[cc
], %[sk
])
189 * v15 = constant for byteswapping words
191 lxvw4x(47, 0, %[idx2be
])
198 vperm(16, 16, 16, 15)
204 * Load next plaintext word and XOR with current IV.
206 lxvw4x(49, 0, %[buf
])
208 vperm(17, 17, 17, 15)
227 vcipherlast(16, 16, 12)
230 * Store back result (with byteswap)
233 vperm(17, 16, 16, 15)
234 stxvw4x(49, 0, %[buf
])
236 stxvw4x(48, 0, %[buf
])
238 addi(%[buf
], %[buf
], 16)
242 : [cc
] "+b" (cc
), [buf
] "+b" (buf
)
243 : [sk
] "b" (sk
), [iv
] "b" (iv
), [num_blocks
] "b" (len
>> 4)
245 , [idx2be
] "b" (idx2be
)
247 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
248 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
254 cbcenc_256(const unsigned char *sk
,
255 const unsigned char *iv
, unsigned char *buf
, size_t len
)
260 static const uint32_t idx2be
[] = {
261 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
269 * Load subkeys into v0..v14
271 lxvw4x(32, %[cc
], %[sk
])
272 addi(%[cc
], %[cc
], 16)
273 lxvw4x(33, %[cc
], %[sk
])
274 addi(%[cc
], %[cc
], 16)
275 lxvw4x(34, %[cc
], %[sk
])
276 addi(%[cc
], %[cc
], 16)
277 lxvw4x(35, %[cc
], %[sk
])
278 addi(%[cc
], %[cc
], 16)
279 lxvw4x(36, %[cc
], %[sk
])
280 addi(%[cc
], %[cc
], 16)
281 lxvw4x(37, %[cc
], %[sk
])
282 addi(%[cc
], %[cc
], 16)
283 lxvw4x(38, %[cc
], %[sk
])
284 addi(%[cc
], %[cc
], 16)
285 lxvw4x(39, %[cc
], %[sk
])
286 addi(%[cc
], %[cc
], 16)
287 lxvw4x(40, %[cc
], %[sk
])
288 addi(%[cc
], %[cc
], 16)
289 lxvw4x(41, %[cc
], %[sk
])
290 addi(%[cc
], %[cc
], 16)
291 lxvw4x(42, %[cc
], %[sk
])
292 addi(%[cc
], %[cc
], 16)
293 lxvw4x(43, %[cc
], %[sk
])
294 addi(%[cc
], %[cc
], 16)
295 lxvw4x(44, %[cc
], %[sk
])
296 addi(%[cc
], %[cc
], 16)
297 lxvw4x(45, %[cc
], %[sk
])
298 addi(%[cc
], %[cc
], 16)
299 lxvw4x(46, %[cc
], %[sk
])
303 * v15 = constant for byteswapping words
305 lxvw4x(47, 0, %[idx2be
])
312 vperm(16, 16, 16, 15)
318 * Load next plaintext word and XOR with current IV.
320 lxvw4x(49, 0, %[buf
])
322 vperm(17, 17, 17, 15)
343 vcipherlast(16, 16, 14)
346 * Store back result (with byteswap)
349 vperm(17, 16, 16, 15)
350 stxvw4x(49, 0, %[buf
])
352 stxvw4x(48, 0, %[buf
])
354 addi(%[buf
], %[buf
], 16)
358 : [cc
] "+b" (cc
), [buf
] "+b" (buf
)
359 : [sk
] "b" (sk
), [iv
] "b" (iv
), [num_blocks
] "b" (len
>> 4)
361 , [idx2be
] "b" (idx2be
)
363 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
364 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
369 /* see bearssl_block.h */
371 br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys
*ctx
,
372 void *iv
, void *data
, size_t len
)
375 switch (ctx
->num_rounds
) {
377 cbcenc_128(ctx
->skey
.skni
, iv
, data
, len
);
380 cbcenc_192(ctx
->skey
.skni
, iv
, data
, len
);
383 cbcenc_256(ctx
->skey
.skni
, iv
, data
, len
);
386 memcpy(iv
, (unsigned char *)data
+ (len
- 16), 16);
390 /* see bearssl_block.h */
391 const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable
= {
392 sizeof(br_aes_pwr8_cbcenc_keys
),
395 (void (*)(const br_block_cbcenc_class
**, const void *, size_t))
396 &br_aes_pwr8_cbcenc_init
,
397 (void (*)(const br_block_cbcenc_class
*const *, void *, void *, size_t))
398 &br_aes_pwr8_cbcenc_run
401 /* see bearssl_block.h */
402 const br_block_cbcenc_class
*
403 br_aes_pwr8_cbcenc_get_vtable(void)
405 return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcenc_vtable
: NULL
;
410 /* see bearssl_block.h */
411 const br_block_cbcenc_class
*
412 br_aes_pwr8_cbcenc_get_vtable(void)