[tomcrypt] replace AES implementation with mbedtls

HW accelerated versions provided for x86_64 and arm64, with compile time
guards and runtime detection

needed for https://github.com/status-im/status-desktop/issues/10726
This commit is contained in:
Lukáš Tinkl 2023-05-23 11:00:36 +02:00
parent cfa6dd9438
commit 97fb545043
No known key found for this signature in database
GPG Key ID: 4ABB993B9382F296
9 changed files with 1828 additions and 1530 deletions

526
aes.c
View File

@ -9,22 +9,6 @@
* Tom St Denis, tomstdenis@gmail.com, http://libtom.org
*/
/* AES implementation by Tom St Denis
*
* Derived from the Public Domain source code by
---
* rijndael-alg-fst.c
*
* @version 3.0 (December 2000)
*
* Optimised ANSI C code for the Rijndael cipher (now AES)
*
* @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
* @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
* @author Paulo Barreto <paulo.barreto@terra.com.br>
---
*/
/**
@file aes.c
Implementation of AES
@ -34,7 +18,12 @@
#ifdef LTC_RIJNDAEL
#ifndef ENCRYPT_ONLY
#include "mbtls_aes.h"
static mbedtls_aes_context ctx_encrypt;
#ifndef ENCRYPT_ONLY
static mbedtls_aes_context ctx_decrypt;
#define SETUP rijndael_setup
#define ECB_ENC rijndael_ecb_encrypt
@ -86,28 +75,6 @@ const struct ltc_cipher_descriptor aes_enc_desc =
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
};
#endif
#include "aes_tab.h"
static ulong32 setup_mix(ulong32 temp)
{
return (Te4_3[byte(temp, 2)]) ^
(Te4_2[byte(temp, 1)]) ^
(Te4_1[byte(temp, 0)]) ^
(Te4_0[byte(temp, 3)]);
}
#ifndef ENCRYPT_ONLY
#ifdef LTC_SMALL_CODE
static ulong32 setup_mix2(ulong32 temp)
{
return Td0(255 & Te4[byte(temp, 3)]) ^
Td1(255 & Te4[byte(temp, 2)]) ^
Td2(255 & Te4[byte(temp, 1)]) ^
Td3(255 & Te4[byte(temp, 0)]);
}
#endif
#endif
/**
@ -120,11 +87,6 @@ static ulong32 setup_mix2(ulong32 temp)
*/
int SETUP(const unsigned char *key, int keylen, int num_rounds, symmetric_key *skey)
{
int i, j;
ulong32 temp, *rk;
#ifndef ENCRYPT_ONLY
ulong32 *rrk;
#endif
LTC_ARGCHK(key != NULL);
LTC_ARGCHK(skey != NULL);
@ -135,143 +97,20 @@ int SETUP(const unsigned char *key, int keylen, int num_rounds, symmetric_key *s
if (num_rounds != 0 && num_rounds != (10 + ((keylen/8)-2)*2)) {
return CRYPT_INVALID_ROUNDS;
}
skey->rijndael.Nr = 10 + ((keylen/8)-2)*2;
/* setup the forward key */
i = 0;
rk = skey->rijndael.eK;
LOAD32H(rk[0], key );
LOAD32H(rk[1], key + 4);
LOAD32H(rk[2], key + 8);
LOAD32H(rk[3], key + 12);
if (keylen == 16) {
j = 44;
for (;;) {
temp = rk[3];
rk[4] = rk[0] ^ setup_mix(temp) ^ rcon[i];
rk[5] = rk[1] ^ rk[4];
rk[6] = rk[2] ^ rk[5];
rk[7] = rk[3] ^ rk[6];
if (++i == 10) {
break;
}
rk += 4;
}
} else if (keylen == 24) {
j = 52;
LOAD32H(rk[4], key + 16);
LOAD32H(rk[5], key + 20);
for (;;) {
#ifdef _MSC_VER
temp = skey->rijndael.eK[rk - skey->rijndael.eK + 5];
#else
temp = rk[5];
#endif
rk[ 6] = rk[ 0] ^ setup_mix(temp) ^ rcon[i];
rk[ 7] = rk[ 1] ^ rk[ 6];
rk[ 8] = rk[ 2] ^ rk[ 7];
rk[ 9] = rk[ 3] ^ rk[ 8];
if (++i == 8) {
break;
}
rk[10] = rk[ 4] ^ rk[ 9];
rk[11] = rk[ 5] ^ rk[10];
rk += 6;
}
} else if (keylen == 32) {
j = 60;
LOAD32H(rk[4], key + 16);
LOAD32H(rk[5], key + 20);
LOAD32H(rk[6], key + 24);
LOAD32H(rk[7], key + 28);
for (;;) {
#ifdef _MSC_VER
temp = skey->rijndael.eK[rk - skey->rijndael.eK + 7];
#else
temp = rk[7];
#endif
rk[ 8] = rk[ 0] ^ setup_mix(temp) ^ rcon[i];
rk[ 9] = rk[ 1] ^ rk[ 8];
rk[10] = rk[ 2] ^ rk[ 9];
rk[11] = rk[ 3] ^ rk[10];
if (++i == 7) {
break;
}
temp = rk[11];
rk[12] = rk[ 4] ^ setup_mix(RORc(temp, 8));
rk[13] = rk[ 5] ^ rk[12];
rk[14] = rk[ 6] ^ rk[13];
rk[15] = rk[ 7] ^ rk[14];
rk += 8;
}
} else {
/* this can't happen */
return CRYPT_ERROR;
}
#ifndef ENCRYPT_ONLY
/* setup the inverse key now */
rk = skey->rijndael.dK;
rrk = skey->rijndael.eK + j - 4;
/* apply the inverse MixColumn transform to all round keys but the first and the last: */
/* copy first */
*rk++ = *rrk++;
*rk++ = *rrk++;
*rk++ = *rrk++;
*rk = *rrk;
rk -= 3; rrk -= 3;
for (i = 1; i < skey->rijndael.Nr; i++) {
rrk -= 4;
rk += 4;
#ifdef LTC_SMALL_CODE
temp = rrk[0];
rk[0] = setup_mix2(temp);
temp = rrk[1];
rk[1] = setup_mix2(temp);
temp = rrk[2];
rk[2] = setup_mix2(temp);
temp = rrk[3];
rk[3] = setup_mix2(temp);
#else
temp = rrk[0];
rk[0] =
Tks0[byte(temp, 3)] ^
Tks1[byte(temp, 2)] ^
Tks2[byte(temp, 1)] ^
Tks3[byte(temp, 0)];
temp = rrk[1];
rk[1] =
Tks0[byte(temp, 3)] ^
Tks1[byte(temp, 2)] ^
Tks2[byte(temp, 1)] ^
Tks3[byte(temp, 0)];
temp = rrk[2];
rk[2] =
Tks0[byte(temp, 3)] ^
Tks1[byte(temp, 2)] ^
Tks2[byte(temp, 1)] ^
Tks3[byte(temp, 0)];
temp = rrk[3];
rk[3] =
Tks0[byte(temp, 3)] ^
Tks1[byte(temp, 2)] ^
Tks2[byte(temp, 1)] ^
Tks3[byte(temp, 0)];
#endif
}
mbedtls_aes_init(&ctx_encrypt);
if (mbedtls_aes_setkey_enc(&ctx_encrypt, key, keylen*8) != 0)
return CRYPT_INVALID_KEYSIZE;
memcpy(skey->rijndael.eK, ctx_encrypt.buf, sizeof(skey->rijndael.eK));
/* copy last */
rrk -= 4;
rk += 4;
*rk++ = *rrk++;
*rk++ = *rrk++;
*rk++ = *rrk++;
*rk = *rrk;
#endif /* ENCRYPT_ONLY */
#ifndef ENCRYPT_ONLY
mbedtls_aes_init(&ctx_decrypt);
if (mbedtls_aes_setkey_dec(&ctx_decrypt, key, keylen*8) != 0)
return CRYPT_INVALID_KEYSIZE;
memcpy(skey->rijndael.dK, ctx_decrypt.buf, sizeof(skey->rijndael.dK));
#endif
skey->rijndael.Nr = ctx_encrypt.nr;
return CRYPT_OK;
}
@ -284,172 +123,26 @@ int SETUP(const unsigned char *key, int keylen, int num_rounds, symmetric_key *s
@return CRYPT_OK if successful
*/
#ifdef LTC_CLEAN_STACK
static int _rijndael_ecb_encrypt(const unsigned char *pt, unsigned char *ct, symmetric_key *skey)
static int _rijndael_ecb_encrypt(const unsigned char *pt, unsigned char *ct, symmetric_key *skey)
#else
int ECB_ENC(const unsigned char *pt, unsigned char *ct, symmetric_key *skey)
#endif
{
ulong32 s0, s1, s2, s3, t0, t1, t2, t3, *rk;
int Nr, r;
LTC_ARGCHK(pt != NULL);
LTC_ARGCHK(ct != NULL);
LTC_ARGCHK(skey != NULL);
Nr = skey->rijndael.Nr;
rk = skey->rijndael.eK;
/*
* map byte array block to cipher state
* and add initial round key:
*/
LOAD32H(s0, pt ); s0 ^= rk[0];
LOAD32H(s1, pt + 4); s1 ^= rk[1];
LOAD32H(s2, pt + 8); s2 ^= rk[2];
LOAD32H(s3, pt + 12); s3 ^= rk[3];
#ifdef LTC_SMALL_CODE
ctx_encrypt.nr = skey->rijndael.Nr;
memset(ctx_encrypt.buf, 0, sizeof(ctx_encrypt.buf));
memcpy(ctx_encrypt.buf, skey->rijndael.eK, sizeof(skey->rijndael.eK));
for (r = 0; ; r++) {
rk += 4;
t0 =
Te0(byte(s0, 3)) ^
Te1(byte(s1, 2)) ^
Te2(byte(s2, 1)) ^
Te3(byte(s3, 0)) ^
rk[0];
t1 =
Te0(byte(s1, 3)) ^
Te1(byte(s2, 2)) ^
Te2(byte(s3, 1)) ^
Te3(byte(s0, 0)) ^
rk[1];
t2 =
Te0(byte(s2, 3)) ^
Te1(byte(s3, 2)) ^
Te2(byte(s0, 1)) ^
Te3(byte(s1, 0)) ^
rk[2];
t3 =
Te0(byte(s3, 3)) ^
Te1(byte(s0, 2)) ^
Te2(byte(s1, 1)) ^
Te3(byte(s2, 0)) ^
rk[3];
if (r == Nr-2) {
break;
}
s0 = t0; s1 = t1; s2 = t2; s3 = t3;
}
rk += 4;
#else
/*
* Nr - 1 full rounds:
*/
r = Nr >> 1;
for (;;) {
t0 =
Te0(byte(s0, 3)) ^
Te1(byte(s1, 2)) ^
Te2(byte(s2, 1)) ^
Te3(byte(s3, 0)) ^
rk[4];
t1 =
Te0(byte(s1, 3)) ^
Te1(byte(s2, 2)) ^
Te2(byte(s3, 1)) ^
Te3(byte(s0, 0)) ^
rk[5];
t2 =
Te0(byte(s2, 3)) ^
Te1(byte(s3, 2)) ^
Te2(byte(s0, 1)) ^
Te3(byte(s1, 0)) ^
rk[6];
t3 =
Te0(byte(s3, 3)) ^
Te1(byte(s0, 2)) ^
Te2(byte(s1, 1)) ^
Te3(byte(s2, 0)) ^
rk[7];
rk += 8;
if (--r == 0) {
break;
}
s0 =
Te0(byte(t0, 3)) ^
Te1(byte(t1, 2)) ^
Te2(byte(t2, 1)) ^
Te3(byte(t3, 0)) ^
rk[0];
s1 =
Te0(byte(t1, 3)) ^
Te1(byte(t2, 2)) ^
Te2(byte(t3, 1)) ^
Te3(byte(t0, 0)) ^
rk[1];
s2 =
Te0(byte(t2, 3)) ^
Te1(byte(t3, 2)) ^
Te2(byte(t0, 1)) ^
Te3(byte(t1, 0)) ^
rk[2];
s3 =
Te0(byte(t3, 3)) ^
Te1(byte(t0, 2)) ^
Te2(byte(t1, 1)) ^
Te3(byte(t2, 0)) ^
rk[3];
}
#endif
/*
* apply last round and
* map cipher state to byte array block:
*/
s0 =
(Te4_3[byte(t0, 3)]) ^
(Te4_2[byte(t1, 2)]) ^
(Te4_1[byte(t2, 1)]) ^
(Te4_0[byte(t3, 0)]) ^
rk[0];
STORE32H(s0, ct);
s1 =
(Te4_3[byte(t1, 3)]) ^
(Te4_2[byte(t2, 2)]) ^
(Te4_1[byte(t3, 1)]) ^
(Te4_0[byte(t0, 0)]) ^
rk[1];
STORE32H(s1, ct+4);
s2 =
(Te4_3[byte(t2, 3)]) ^
(Te4_2[byte(t3, 2)]) ^
(Te4_1[byte(t0, 1)]) ^
(Te4_0[byte(t1, 0)]) ^
rk[2];
STORE32H(s2, ct+8);
s3 =
(Te4_3[byte(t3, 3)]) ^
(Te4_2[byte(t0, 2)]) ^
(Te4_1[byte(t1, 1)]) ^
(Te4_0[byte(t2, 0)]) ^
rk[3];
STORE32H(s3, ct+12);
return CRYPT_OK;
return mbedtls_aes_crypt_ecb(&ctx_encrypt, MBEDTLS_AES_ENCRYPT, pt, ct) == 0 ? CRYPT_OK : CRYPT_ERROR;
}
#ifdef LTC_CLEAN_STACK
int ECB_ENC(const unsigned char *pt, unsigned char *ct, symmetric_key *skey)
int ECB_ENC(const unsigned char *pt, unsigned char *ct, symmetric_key *skey)
{
int err = _rijndael_ecb_encrypt(pt, ct, skey);
burn_stack(sizeof(unsigned long)*8 + sizeof(unsigned long*) + sizeof(int)*2);
return err;
return _rijndael_ecb_encrypt(pt, ct, skey);
}
#endif
@ -463,173 +156,27 @@ int ECB_ENC(const unsigned char *pt, unsigned char *ct, symmetric_key *skey)
@return CRYPT_OK if successful
*/
#ifdef LTC_CLEAN_STACK
static int _rijndael_ecb_decrypt(const unsigned char *ct, unsigned char *pt, symmetric_key *skey)
static int _rijndael_ecb_decrypt(const unsigned char *ct, unsigned char *pt, symmetric_key *skey)
#else
int ECB_DEC(const unsigned char *ct, unsigned char *pt, symmetric_key *skey)
#endif
{
ulong32 s0, s1, s2, s3, t0, t1, t2, t3, *rk;
int Nr, r;
LTC_ARGCHK(pt != NULL);
LTC_ARGCHK(ct != NULL);
LTC_ARGCHK(skey != NULL);
Nr = skey->rijndael.Nr;
rk = skey->rijndael.dK;
/*
* map byte array block to cipher state
* and add initial round key:
*/
LOAD32H(s0, ct ); s0 ^= rk[0];
LOAD32H(s1, ct + 4); s1 ^= rk[1];
LOAD32H(s2, ct + 8); s2 ^= rk[2];
LOAD32H(s3, ct + 12); s3 ^= rk[3];
ctx_decrypt.nr = skey->rijndael.Nr;
memset(ctx_decrypt.buf, 0, sizeof(ctx_decrypt.buf));
memcpy(ctx_decrypt.buf, skey->rijndael.dK, sizeof(skey->rijndael.dK));
#ifdef LTC_SMALL_CODE
for (r = 0; ; r++) {
rk += 4;
t0 =
Td0(byte(s0, 3)) ^
Td1(byte(s3, 2)) ^
Td2(byte(s2, 1)) ^
Td3(byte(s1, 0)) ^
rk[0];
t1 =
Td0(byte(s1, 3)) ^
Td1(byte(s0, 2)) ^
Td2(byte(s3, 1)) ^
Td3(byte(s2, 0)) ^
rk[1];
t2 =
Td0(byte(s2, 3)) ^
Td1(byte(s1, 2)) ^
Td2(byte(s0, 1)) ^
Td3(byte(s3, 0)) ^
rk[2];
t3 =
Td0(byte(s3, 3)) ^
Td1(byte(s2, 2)) ^
Td2(byte(s1, 1)) ^
Td3(byte(s0, 0)) ^
rk[3];
if (r == Nr-2) {
break;
}
s0 = t0; s1 = t1; s2 = t2; s3 = t3;
}
rk += 4;
#else
/*
* Nr - 1 full rounds:
*/
r = Nr >> 1;
for (;;) {
t0 =
Td0(byte(s0, 3)) ^
Td1(byte(s3, 2)) ^
Td2(byte(s2, 1)) ^
Td3(byte(s1, 0)) ^
rk[4];
t1 =
Td0(byte(s1, 3)) ^
Td1(byte(s0, 2)) ^
Td2(byte(s3, 1)) ^
Td3(byte(s2, 0)) ^
rk[5];
t2 =
Td0(byte(s2, 3)) ^
Td1(byte(s1, 2)) ^
Td2(byte(s0, 1)) ^
Td3(byte(s3, 0)) ^
rk[6];
t3 =
Td0(byte(s3, 3)) ^
Td1(byte(s2, 2)) ^
Td2(byte(s1, 1)) ^
Td3(byte(s0, 0)) ^
rk[7];
rk += 8;
if (--r == 0) {
break;
}
s0 =
Td0(byte(t0, 3)) ^
Td1(byte(t3, 2)) ^
Td2(byte(t2, 1)) ^
Td3(byte(t1, 0)) ^
rk[0];
s1 =
Td0(byte(t1, 3)) ^
Td1(byte(t0, 2)) ^
Td2(byte(t3, 1)) ^
Td3(byte(t2, 0)) ^
rk[1];
s2 =
Td0(byte(t2, 3)) ^
Td1(byte(t1, 2)) ^
Td2(byte(t0, 1)) ^
Td3(byte(t3, 0)) ^
rk[2];
s3 =
Td0(byte(t3, 3)) ^
Td1(byte(t2, 2)) ^
Td2(byte(t1, 1)) ^
Td3(byte(t0, 0)) ^
rk[3];
}
#endif
/*
* apply last round and
* map cipher state to byte array block:
*/
s0 =
(Td4[byte(t0, 3)] & 0xff000000) ^
(Td4[byte(t3, 2)] & 0x00ff0000) ^
(Td4[byte(t2, 1)] & 0x0000ff00) ^
(Td4[byte(t1, 0)] & 0x000000ff) ^
rk[0];
STORE32H(s0, pt);
s1 =
(Td4[byte(t1, 3)] & 0xff000000) ^
(Td4[byte(t0, 2)] & 0x00ff0000) ^
(Td4[byte(t3, 1)] & 0x0000ff00) ^
(Td4[byte(t2, 0)] & 0x000000ff) ^
rk[1];
STORE32H(s1, pt+4);
s2 =
(Td4[byte(t2, 3)] & 0xff000000) ^
(Td4[byte(t1, 2)] & 0x00ff0000) ^
(Td4[byte(t0, 1)] & 0x0000ff00) ^
(Td4[byte(t3, 0)] & 0x000000ff) ^
rk[2];
STORE32H(s2, pt+8);
s3 =
(Td4[byte(t3, 3)] & 0xff000000) ^
(Td4[byte(t2, 2)] & 0x00ff0000) ^
(Td4[byte(t1, 1)] & 0x0000ff00) ^
(Td4[byte(t0, 0)] & 0x000000ff) ^
rk[3];
STORE32H(s3, pt+12);
return CRYPT_OK;
return mbedtls_aes_crypt_ecb(&ctx_decrypt, MBEDTLS_AES_DECRYPT, ct, pt) == 0 ? CRYPT_OK : CRYPT_ERROR;
}
#ifdef LTC_CLEAN_STACK
int ECB_DEC(const unsigned char *ct, unsigned char *pt, symmetric_key *skey)
int ECB_DEC(const unsigned char *ct, unsigned char *pt, symmetric_key *skey)
{
int err = _rijndael_ecb_decrypt(ct, pt, skey);
burn_stack(sizeof(unsigned long)*8 + sizeof(unsigned long*) + sizeof(int)*2);
return err;
return _rijndael_ecb_decrypt(ct, pt, skey);
}
#endif
@ -726,6 +273,10 @@ int ECB_TEST(void)
*/
void ECB_DONE(symmetric_key *skey)
{
mbedtls_aes_free(&ctx_encrypt);
#ifndef ENCRYPT_ONLY
mbedtls_aes_free(&ctx_decrypt);
#endif
}
@ -753,8 +304,3 @@ int ECB_KS(int *keysize)
}
#endif
/* $Source$ */
/* $Revision$ */
/* $Date$ */

1028
aes_tab.h

File diff suppressed because it is too large Load Diff

302
aesce.c Normal file
View File

@ -0,0 +1,302 @@
/*
* Armv8-A Cryptographic Extension support functions for Aarch64
*
* Copyright The Mbed TLS Contributors
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#if defined(__aarch64__) && !defined(__ARM_FEATURE_CRYPTO) && defined(__clang__) && __clang_major__ >= 4
/* TODO: Re-consider above after https://reviews.llvm.org/D131064 merged.
*
* The intrinsic declaration are guarded by predefined ACLE macros in clang:
* these are normally only enabled by the -march option on the command line.
* By defining the macros ourselves we gain access to those declarations without
* requiring -march on the command line.
*
* `arm_neon.h` could be included by any header file, so we put these defines
* at the top of this file, before any includes.
*/
#define __ARM_FEATURE_CRYPTO 1
/* See: https://arm-software.github.io/acle/main/acle.html#cryptographic-extensions
*
* `__ARM_FEATURE_CRYPTO` is deprecated, but we need to continue to specify it
* for older compilers.
*/
#define __ARM_FEATURE_AES 1
#define MBEDTLS_ENABLE_ARM_CRYPTO_EXTENSIONS_COMPILER_FLAG
#endif
#include <string.h>
#include "aesce.h"
#if defined(MBEDTLS_AESCE_C)
#if defined(MBEDTLS_HAVE_ARM64)
/* Compiler version checks. */
#if defined(__clang__)
# if __clang_major__ < 4
# error "Minimum version of Clang for MBEDTLS_AESCE_C is 4.0."
# endif
#elif defined(__GNUC__)
# if __GNUC__ < 6
# error "Minimum version of GCC for MBEDTLS_AESCE_C is 6.0."
# endif
#elif defined(_MSC_VER)
/* TODO: We haven't verified MSVC from 1920 to 1928. If someone verified that,
* please update this and document of `MBEDTLS_AESCE_C` in
* `mbedtls_config.h`. */
# if _MSC_VER < 1929
# error "Minimum version of MSVC for MBEDTLS_AESCE_C is 2019 version 16.11.2."
# endif
#endif
#if !defined(__ARM_FEATURE_AES) || defined(MBEDTLS_ENABLE_ARM_CRYPTO_EXTENSIONS_COMPILER_FLAG)
# if defined(__clang__)
# pragma clang attribute push (__attribute__((target("crypto"))), apply_to=function)
# define MBEDTLS_POP_TARGET_PRAGMA
# elif defined(__GNUC__)
# pragma GCC push_options
# pragma GCC target ("arch=armv8-a+crypto")
# define MBEDTLS_POP_TARGET_PRAGMA
# elif defined(_MSC_VER)
# error "Required feature(__ARM_FEATURE_AES) is not enabled."
# endif
#endif /* !__ARM_FEATURE_AES || MBEDTLS_ENABLE_ARM_CRYPTO_EXTENSIONS_COMPILER_FLAG */
#include <arm_neon.h>
#if defined(__linux__)
#include <asm/hwcap.h>
#include <sys/auxv.h>
#endif
/*
* AES instruction support detection routine
*/
int mbedtls_aesce_has_support(void)
{
#if defined(__linux__)
unsigned long auxval = getauxval(AT_HWCAP);
return (auxval & (HWCAP_ASIMD | HWCAP_AES)) ==
(HWCAP_ASIMD | HWCAP_AES);
#else
/* Assume AES instructions are supported. */
return 1;
#endif
}
static uint8x16_t aesce_encrypt_block(uint8x16_t block,
unsigned char *keys,
int rounds)
{
for (int i = 0; i < rounds - 1; i++) {
/* AES AddRoundKey, SubBytes, ShiftRows (in this order).
* AddRoundKey adds the round key for the previous round. */
block = vaeseq_u8(block, vld1q_u8(keys + i * 16));
/* AES mix columns */
block = vaesmcq_u8(block);
}
/* AES AddRoundKey for the previous round.
* SubBytes, ShiftRows for the final round. */
block = vaeseq_u8(block, vld1q_u8(keys + (rounds -1) * 16));
/* Final round: no MixColumns */
/* Final AddRoundKey */
block = veorq_u8(block, vld1q_u8(keys + rounds * 16));
return block;
}
static uint8x16_t aesce_decrypt_block(uint8x16_t block,
unsigned char *keys,
int rounds)
{
for (int i = 0; i < rounds - 1; i++) {
/* AES AddRoundKey, SubBytes, ShiftRows */
block = vaesdq_u8(block, vld1q_u8(keys + i * 16));
/* AES inverse MixColumns for the next round.
*
* This means that we switch the order of the inverse AddRoundKey and
* inverse MixColumns operations. We have to do this as AddRoundKey is
* done in an atomic instruction together with the inverses of SubBytes
* and ShiftRows.
*
* It works because MixColumns is a linear operation over GF(2^8) and
* AddRoundKey is an exclusive or, which is equivalent to addition over
* GF(2^8). (The inverse of MixColumns needs to be applied to the
* affected round keys separately which has been done when the
* decryption round keys were calculated.) */
block = vaesimcq_u8(block);
}
/* The inverses of AES AddRoundKey, SubBytes, ShiftRows finishing up the
* last full round. */
block = vaesdq_u8(block, vld1q_u8(keys + (rounds - 1) * 16));
/* Inverse AddRoundKey for inverting the initial round key addition. */
block = veorq_u8(block, vld1q_u8(keys + rounds * 16));
return block;
}
/*
* AES-ECB block en(de)cryption
*/
int mbedtls_aesce_crypt_ecb(mbedtls_aes_context *ctx,
int mode,
const unsigned char input[16],
unsigned char output[16])
{
uint8x16_t block = vld1q_u8(&input[0]);
unsigned char *keys = (unsigned char *) (ctx->buf + ctx->rk_offset);
if (mode == MBEDTLS_AES_ENCRYPT) {
block = aesce_encrypt_block(block, keys, ctx->nr);
} else {
block = aesce_decrypt_block(block, keys, ctx->nr);
}
vst1q_u8(&output[0], block);
return 0;
}
/*
* Compute decryption round keys from encryption round keys
*/
void mbedtls_aesce_inverse_key(unsigned char *invkey,
const unsigned char *fwdkey,
int nr)
{
int i, j;
j = nr;
vst1q_u8(invkey, vld1q_u8(fwdkey + j * 16));
for (i = 1, j--; j > 0; i++, j--) {
vst1q_u8(invkey + i * 16,
vaesimcq_u8(vld1q_u8(fwdkey + j * 16)));
}
vst1q_u8(invkey + i * 16, vld1q_u8(fwdkey + j * 16));
}
static inline uint32_t aes_rot_word(uint32_t word)
{
return (word << (32 - 8)) | (word >> 8);
}
static inline uint32_t aes_sub_word(uint32_t in)
{
uint8x16_t v = vreinterpretq_u8_u32(vdupq_n_u32(in));
uint8x16_t zero = vdupq_n_u8(0);
/* vaeseq_u8 does both SubBytes and ShiftRows. Taking the first row yields
* the correct result as ShiftRows doesn't change the first row. */
v = vaeseq_u8(zero, v);
return vgetq_lane_u32(vreinterpretq_u32_u8(v), 0);
}
/*
* Key expansion function
*/
static void aesce_setkey_enc(unsigned char *rk,
const unsigned char *key,
const size_t key_bit_length)
{
static uint8_t const rcon[] = { 0x01, 0x02, 0x04, 0x08, 0x10,
0x20, 0x40, 0x80, 0x1b, 0x36 };
/* See https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.197.pdf
* - Section 5, Nr = Nk + 6
* - Section 5.2, the length of round keys is Nb*(Nr+1)
*/
const uint32_t key_len_in_words = key_bit_length / 32; /* Nk */
const size_t round_key_len_in_words = 4; /* Nb */
const size_t rounds_needed = key_len_in_words + 6; /* Nr */
const size_t round_keys_len_in_words =
round_key_len_in_words * (rounds_needed + 1); /* Nb*(Nr+1) */
const uint32_t *rko_end = (uint32_t *) rk + round_keys_len_in_words;
memcpy(rk, key, key_len_in_words * 4);
for (uint32_t *rki = (uint32_t *) rk;
rki + key_len_in_words < rko_end;
rki += key_len_in_words) {
size_t iteration = (rki - (uint32_t *) rk) / key_len_in_words;
uint32_t *rko;
rko = rki + key_len_in_words;
rko[0] = aes_rot_word(aes_sub_word(rki[key_len_in_words - 1]));
rko[0] ^= rcon[iteration] ^ rki[0];
rko[1] = rko[0] ^ rki[1];
rko[2] = rko[1] ^ rki[2];
rko[3] = rko[2] ^ rki[3];
if (rko + key_len_in_words > rko_end) {
/* Do not write overflow words.*/
continue;
}
switch (key_bit_length) {
case 128:
break;
case 192:
rko[4] = rko[3] ^ rki[4];
rko[5] = rko[4] ^ rki[5];
break;
case 256:
rko[4] = aes_sub_word(rko[3]) ^ rki[4];
rko[5] = rko[4] ^ rki[5];
rko[6] = rko[5] ^ rki[6];
rko[7] = rko[6] ^ rki[7];
break;
}
}
}
/*
* Key expansion, wrapper
*/
int mbedtls_aesce_setkey_enc(unsigned char *rk,
const unsigned char *key,
size_t bits)
{
switch (bits) {
case 128:
case 192:
case 256:
aesce_setkey_enc(rk, key, bits);
break;
default:
return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
}
return 0;
}
#if defined(MBEDTLS_POP_TARGET_PRAGMA)
#if defined(__clang__)
#pragma clang attribute pop
#elif defined(__GNUC__)
#pragma GCC pop_options
#endif
#undef MBEDTLS_POP_TARGET_PRAGMA
#endif
#endif /* MBEDTLS_HAVE_ARM64 */
#endif /* MBEDTLS_AESCE_C */

90
aesce.h Normal file
View File

@ -0,0 +1,90 @@
/**
* \file aesce.h
*
* \brief Support hardware AES acceleration on Armv8-A processors with
* the Armv8-A Cryptographic Extension in AArch64 execution state.
*
* \warning These functions are only for internal use by other library
* functions; you must not call them directly.
*/
/*
* Copyright The Mbed TLS Contributors
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MBEDTLS_AESCE_H
#define MBEDTLS_AESCE_H
#include "mbtls_aes.h"
#if defined(MBEDTLS_HAVE_ARM64)
#ifdef __cplusplus
extern "C" {
#endif
/**
* \brief Internal function to detect the crypto extension in CPUs.
*
* \return 1 if CPU has support for the feature, 0 otherwise
*/
int mbedtls_aesce_has_support(void);
/**
* \brief Internal AES-ECB block encryption and decryption
*
* \param ctx AES context
* \param mode MBEDTLS_AES_ENCRYPT or MBEDTLS_AES_DECRYPT
* \param input 16-byte input block
* \param output 16-byte output block
*
* \return 0 on success (cannot fail)
*/
int mbedtls_aesce_crypt_ecb(mbedtls_aes_context *ctx,
int mode,
const unsigned char input[16],
unsigned char output[16]);
/**
* \brief Internal round key inversion. This function computes
* decryption round keys from the encryption round keys.
*
* \param invkey Round keys for the equivalent inverse cipher
* \param fwdkey Original round keys (for encryption)
* \param nr Number of rounds (that is, number of round keys minus one)
*/
void mbedtls_aesce_inverse_key(unsigned char *invkey,
const unsigned char *fwdkey,
int nr);
/**
* \brief Internal key expansion for encryption
*
* \param rk Destination buffer where the round keys are written
* \param key Encryption key
* \param bits Key size in bits (must be 128, 192 or 256)
*
* \return 0 if successful, or MBEDTLS_ERR_AES_INVALID_KEY_LENGTH
*/
int mbedtls_aesce_setkey_enc(unsigned char *rk,
const unsigned char *key,
size_t bits);
#ifdef __cplusplus
}
#endif
#endif /* MBEDTLS_HAVE_ARM64 */
#endif /* MBEDTLS_AESCE_H */

331
aesni.c Normal file
View File

@ -0,0 +1,331 @@
/*
* AES-NI support functions
*
* Copyright The Mbed TLS Contributors
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* [AES-WP] https://www.intel.com/content/www/us/en/developer/articles/tool/intel-advanced-encryption-standard-aes-instructions-set.html
* [CLMUL-WP] https://www.intel.com/content/www/us/en/develop/download/intel-carry-less-multiplication-instruction-and-its-usage-for-computing-the-gcm-mode.html
*/
#include "aesni.h"
#if defined(MBEDTLS_AESNI_C)
#include <string.h>
#if defined(MBEDTLS_AESNI_HAVE_CODE)
/*
* AES-NI support detection routine
*/
int mbedtls_aesni_has_support(unsigned int what)
{
static int done = 0;
static unsigned int c = 0;
if (!done) {
/* AESNI using asm */
asm ("movl $1, %%eax \n\t"
"cpuid \n\t"
: "=c" (c)
:
: "eax", "ebx", "edx");
done = 1;
}
return (c & what) != 0;
}
/*
* Binutils needs to be at least 2.19 to support AES-NI instructions.
* Unfortunately, a lot of users have a lower version now (2014-04).
* Emit bytecode directly in order to support "old" version of gas.
*
* Opcodes from the Intel architecture reference manual, vol. 3.
* We always use registers, so we don't need prefixes for memory operands.
* Operand macros are in gas order (src, dst) as opposed to Intel order
* (dst, src) in order to blend better into the surrounding assembly code.
*/
#define AESDEC(regs) ".byte 0x66,0x0F,0x38,0xDE," regs "\n\t"
#define AESDECLAST(regs) ".byte 0x66,0x0F,0x38,0xDF," regs "\n\t"
#define AESENC(regs) ".byte 0x66,0x0F,0x38,0xDC," regs "\n\t"
#define AESENCLAST(regs) ".byte 0x66,0x0F,0x38,0xDD," regs "\n\t"
#define AESIMC(regs) ".byte 0x66,0x0F,0x38,0xDB," regs "\n\t"
#define AESKEYGENA(regs, imm) ".byte 0x66,0x0F,0x3A,0xDF," regs "," imm "\n\t"
#define PCLMULQDQ(regs, imm) ".byte 0x66,0x0F,0x3A,0x44," regs "," imm "\n\t"
#define xmm0_xmm0 "0xC0"
#define xmm0_xmm1 "0xC8"
#define xmm0_xmm2 "0xD0"
#define xmm0_xmm3 "0xD8"
#define xmm0_xmm4 "0xE0"
#define xmm1_xmm0 "0xC1"
#define xmm1_xmm2 "0xD1"
/*
* AES-NI AES-ECB block en(de)cryption
*/
int mbedtls_aesni_crypt_ecb(mbedtls_aes_context *ctx, int mode, const unsigned char input[16], unsigned char output[16])
{
asm ("movdqu (%3), %%xmm0 \n\t" // load input
"movdqu (%1), %%xmm1 \n\t" // load round key 0
"pxor %%xmm1, %%xmm0 \n\t" // round 0
"add $16, %1 \n\t" // point to next round key
"subl $1, %0 \n\t" // normal rounds = nr - 1
"test %2, %2 \n\t" // mode?
"jz 2f \n\t" // 0 = decrypt
"1: \n\t" // encryption loop
"movdqu (%1), %%xmm1 \n\t" // load round key
AESENC(xmm1_xmm0) // do round
"add $16, %1 \n\t" // point to next round key
"subl $1, %0 \n\t" // loop
"jnz 1b \n\t"
"movdqu (%1), %%xmm1 \n\t" // load round key
AESENCLAST(xmm1_xmm0) // last round
"jmp 3f \n\t"
"2: \n\t" // decryption loop
"movdqu (%1), %%xmm1 \n\t"
AESDEC(xmm1_xmm0) // do round
"add $16, %1 \n\t"
"subl $1, %0 \n\t"
"jnz 2b \n\t"
"movdqu (%1), %%xmm1 \n\t" // load round key
AESDECLAST(xmm1_xmm0) // last round
"3: \n\t"
"movdqu %%xmm0, (%4) \n\t" // export output
:
: "r" (ctx->nr), "r" (ctx->buf + ctx->rk_offset), "r" (mode), "r" (input), "r" (output)
: "memory", "cc", "xmm0", "xmm1");
return 0;
}
/*
* Compute decryption round keys from encryption round keys
*/
void mbedtls_aesni_inverse_key(unsigned char *invkey, const unsigned char *fwdkey, int nr)
{
unsigned char *ik = invkey;
const unsigned char *fk = fwdkey + 16 * nr;
memcpy(ik, fk, 16);
for (fk -= 16, ik += 16; fk > fwdkey; fk -= 16, ik += 16) {
asm ("movdqu (%0), %%xmm0 \n\t"
AESIMC(xmm0_xmm0)
"movdqu %%xmm0, (%1) \n\t"
:
: "r" (fk), "r" (ik)
: "memory", "xmm0");
}
memcpy(ik, fk, 16);
}
/*
* Key expansion, 128-bit case
*/
static void aesni_setkey_enc_128(unsigned char *rk, const unsigned char *key)
{
asm ("movdqu (%1), %%xmm0 \n\t" // copy the original key
"movdqu %%xmm0, (%0) \n\t" // as round key 0
"jmp 2f \n\t" // skip auxiliary routine
/*
* Finish generating the next round key.
*
* On entry xmm0 is r3:r2:r1:r0 and xmm1 is X:stuff:stuff:stuff
* with X = rot( sub( r3 ) ) ^ RCON.
*
* On exit, xmm0 is r7:r6:r5:r4
* with r4 = X + r0, r5 = r4 + r1, r6 = r5 + r2, r7 = r6 + r3
* and those are written to the round key buffer.
*/
"1: \n\t"
"pshufd $0xff, %%xmm1, %%xmm1 \n\t" // X:X:X:X
"pxor %%xmm0, %%xmm1 \n\t" // X+r3:X+r2:X+r1:r4
"pslldq $4, %%xmm0 \n\t" // r2:r1:r0:0
"pxor %%xmm0, %%xmm1 \n\t" // X+r3+r2:X+r2+r1:r5:r4
"pslldq $4, %%xmm0 \n\t" // etc
"pxor %%xmm0, %%xmm1 \n\t"
"pslldq $4, %%xmm0 \n\t"
"pxor %%xmm1, %%xmm0 \n\t" // update xmm0 for next time!
"add $16, %0 \n\t" // point to next round key
"movdqu %%xmm0, (%0) \n\t" // write it
"ret \n\t"
/* Main "loop" */
"2: \n\t"
AESKEYGENA(xmm0_xmm1, "0x01") "call 1b \n\t"
AESKEYGENA(xmm0_xmm1, "0x02") "call 1b \n\t"
AESKEYGENA(xmm0_xmm1, "0x04") "call 1b \n\t"
AESKEYGENA(xmm0_xmm1, "0x08") "call 1b \n\t"
AESKEYGENA(xmm0_xmm1, "0x10") "call 1b \n\t"
AESKEYGENA(xmm0_xmm1, "0x20") "call 1b \n\t"
AESKEYGENA(xmm0_xmm1, "0x40") "call 1b \n\t"
AESKEYGENA(xmm0_xmm1, "0x80") "call 1b \n\t"
AESKEYGENA(xmm0_xmm1, "0x1B") "call 1b \n\t"
AESKEYGENA(xmm0_xmm1, "0x36") "call 1b \n\t"
:
: "r" (rk), "r" (key)
: "memory", "cc", "0");
}
/*
* Key expansion, 192-bit case
*/
static void aesni_setkey_enc_192(unsigned char *rk, const unsigned char *key)
{
asm ("movdqu (%1), %%xmm0 \n\t" // copy original round key
"movdqu %%xmm0, (%0) \n\t"
"add $16, %0 \n\t"
"movq 16(%1), %%xmm1 \n\t"
"movq %%xmm1, (%0) \n\t"
"add $8, %0 \n\t"
"jmp 2f \n\t" // skip auxiliary routine
/*
* Finish generating the next 6 quarter-keys.
*
* On entry xmm0 is r3:r2:r1:r0, xmm1 is stuff:stuff:r5:r4
* and xmm2 is stuff:stuff:X:stuff with X = rot( sub( r3 ) ) ^ RCON.
*
* On exit, xmm0 is r9:r8:r7:r6 and xmm1 is stuff:stuff:r11:r10
* and those are written to the round key buffer.
*/
"1: \n\t"
"pshufd $0x55, %%xmm2, %%xmm2 \n\t" // X:X:X:X
"pxor %%xmm0, %%xmm2 \n\t" // X+r3:X+r2:X+r1:r4
"pslldq $4, %%xmm0 \n\t" // etc
"pxor %%xmm0, %%xmm2 \n\t"
"pslldq $4, %%xmm0 \n\t"
"pxor %%xmm0, %%xmm2 \n\t"
"pslldq $4, %%xmm0 \n\t"
"pxor %%xmm2, %%xmm0 \n\t" // update xmm0 = r9:r8:r7:r6
"movdqu %%xmm0, (%0) \n\t"
"add $16, %0 \n\t"
"pshufd $0xff, %%xmm0, %%xmm2 \n\t" // r9:r9:r9:r9
"pxor %%xmm1, %%xmm2 \n\t" // stuff:stuff:r9+r5:r10
"pslldq $4, %%xmm1 \n\t" // r2:r1:r0:0
"pxor %%xmm2, %%xmm1 \n\t" // xmm1 = stuff:stuff:r11:r10
"movq %%xmm1, (%0) \n\t"
"add $8, %0 \n\t"
"ret \n\t"
"2: \n\t"
AESKEYGENA(xmm1_xmm2, "0x01") "call 1b \n\t"
AESKEYGENA(xmm1_xmm2, "0x02") "call 1b \n\t"
AESKEYGENA(xmm1_xmm2, "0x04") "call 1b \n\t"
AESKEYGENA(xmm1_xmm2, "0x08") "call 1b \n\t"
AESKEYGENA(xmm1_xmm2, "0x10") "call 1b \n\t"
AESKEYGENA(xmm1_xmm2, "0x20") "call 1b \n\t"
AESKEYGENA(xmm1_xmm2, "0x40") "call 1b \n\t"
AESKEYGENA(xmm1_xmm2, "0x80") "call 1b \n\t"
:
: "r" (rk), "r" (key)
: "memory", "cc", "0");
}
/*
* Key expansion, 256-bit case
*/
static void aesni_setkey_enc_256(unsigned char *rk, const unsigned char *key)
{
asm ("movdqu (%1), %%xmm0 \n\t"
"movdqu %%xmm0, (%0) \n\t"
"add $16, %0 \n\t"
"movdqu 16(%1), %%xmm1 \n\t"
"movdqu %%xmm1, (%0) \n\t"
"jmp 2f \n\t" // skip auxiliary routine
/*
* Finish generating the next two round keys.
*
* On entry xmm0 is r3:r2:r1:r0, xmm1 is r7:r6:r5:r4 and
* xmm2 is X:stuff:stuff:stuff with X = rot( sub( r7 )) ^ RCON
*
* On exit, xmm0 is r11:r10:r9:r8 and xmm1 is r15:r14:r13:r12
* and those have been written to the output buffer.
*/
"1: \n\t"
"pshufd $0xff, %%xmm2, %%xmm2 \n\t"
"pxor %%xmm0, %%xmm2 \n\t"
"pslldq $4, %%xmm0 \n\t"
"pxor %%xmm0, %%xmm2 \n\t"
"pslldq $4, %%xmm0 \n\t"
"pxor %%xmm0, %%xmm2 \n\t"
"pslldq $4, %%xmm0 \n\t"
"pxor %%xmm2, %%xmm0 \n\t"
"add $16, %0 \n\t"
"movdqu %%xmm0, (%0) \n\t"
/* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 )
* and proceed to generate next round key from there */
AESKEYGENA(xmm0_xmm2, "0x00")
"pshufd $0xaa, %%xmm2, %%xmm2 \n\t"
"pxor %%xmm1, %%xmm2 \n\t"
"pslldq $4, %%xmm1 \n\t"
"pxor %%xmm1, %%xmm2 \n\t"
"pslldq $4, %%xmm1 \n\t"
"pxor %%xmm1, %%xmm2 \n\t"
"pslldq $4, %%xmm1 \n\t"
"pxor %%xmm2, %%xmm1 \n\t"
"add $16, %0 \n\t"
"movdqu %%xmm1, (%0) \n\t"
"ret \n\t"
/*
* Main "loop" - Generating one more key than necessary,
* see definition of mbedtls_aes_context.buf
*/
"2: \n\t"
AESKEYGENA(xmm1_xmm2, "0x01") "call 1b \n\t"
AESKEYGENA(xmm1_xmm2, "0x02") "call 1b \n\t"
AESKEYGENA(xmm1_xmm2, "0x04") "call 1b \n\t"
AESKEYGENA(xmm1_xmm2, "0x08") "call 1b \n\t"
AESKEYGENA(xmm1_xmm2, "0x10") "call 1b \n\t"
AESKEYGENA(xmm1_xmm2, "0x20") "call 1b \n\t"
AESKEYGENA(xmm1_xmm2, "0x40") "call 1b \n\t"
:
: "r" (rk), "r" (key)
: "memory", "cc", "0");
}
/*
* Key expansion, wrapper
*/
int mbedtls_aesni_setkey_enc(unsigned char *rk, const unsigned char *key, size_t bits)
{
switch (bits) {
case 128: aesni_setkey_enc_128(rk, key); break;
case 192: aesni_setkey_enc_192(rk, key); break;
case 256: aesni_setkey_enc_256(rk, key); break;
default: return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
}
return 0;
}
#endif /* MBEDTLS_AESNI_HAVE_CODE */
#endif /* MBEDTLS_AESNI_C */

138
aesni.h Normal file
View File

@ -0,0 +1,138 @@
/**
* \file aesni.h
*
* \brief AES-NI for hardware AES acceleration on some Intel processors
*
* \warning These functions are only for internal use by other library
* functions; you must not call them directly.
*/
/*
* Copyright The Mbed TLS Contributors
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MBEDTLS_AESNI_H
#define MBEDTLS_AESNI_H
#include "mbtls_aes.h"
#define MBEDTLS_AESNI_AES 0x02000000u
#define MBEDTLS_AESNI_CLMUL 0x00000002u
#if defined(MBEDTLS_AESNI_C)
/* Can we do AESNI with intrinsics?
* (Only implemented with certain compilers, only for certain targets.)
*/
#undef MBEDTLS_AESNI_HAVE_INTRINSICS
#if defined(_MSC_VER)
/* Visual Studio supports AESNI intrinsics since VS 2008 SP1. We only support
* VS 2013 and up for other reasons anyway, so no need to check the version. */
#define MBEDTLS_AESNI_HAVE_INTRINSICS
#endif
/* GCC-like compilers: currently, we only support intrinsics if the requisite
* target flag is enabled when building the library (e.g. `gcc -mpclmul -msse2`
* or `clang -maes -mpclmul`). */
#if defined(__GNUC__) && defined(__AES__) && defined(__PCLMUL__)
#define MBEDTLS_AESNI_HAVE_INTRINSICS
#endif
/* Choose the implementation of AESNI, if one is available. */
#undef MBEDTLS_AESNI_HAVE_CODE
/* To minimize disruption when releasing the intrinsics-based implementation,
* favor the assembly-based implementation if it's available. We intend to
* revise this in a later release of Mbed TLS 3.x. In the long run, we will
* likely remove the assembly implementation. */
#if defined(MBEDTLS_HAVE_X86_64)
#define MBEDTLS_AESNI_HAVE_CODE 1 // via assembly
#elif defined(MBEDTLS_AESNI_HAVE_INTRINSICS)
#define MBEDTLS_AESNI_HAVE_CODE 2 // via intrinsics
#endif
#if defined(MBEDTLS_AESNI_HAVE_CODE)
#ifdef __cplusplus
extern "C" {
#endif
/**
* \brief Internal function to detect the AES-NI feature in CPUs.
*
* \note This function is only for internal use by other library
* functions; you must not call it directly.
*
* \param what The feature to detect
* (MBEDTLS_AESNI_AES or MBEDTLS_AESNI_CLMUL)
*
* \return 1 if CPU has support for the feature, 0 otherwise
*/
int mbedtls_aesni_has_support(unsigned int what);
/**
* \brief Internal AES-NI AES-ECB block encryption and decryption
*
* \note This function is only for internal use by other library
* functions; you must not call it directly.
*
* \param ctx AES context
* \param mode MBEDTLS_AES_ENCRYPT or MBEDTLS_AES_DECRYPT
* \param input 16-byte input block
* \param output 16-byte output block
*
* \return 0 on success (cannot fail)
*/
int mbedtls_aesni_crypt_ecb(mbedtls_aes_context *ctx,
int mode,
const unsigned char input[16],
unsigned char output[16]);
/**
* \brief Internal round key inversion. This function computes
* decryption round keys from the encryption round keys.
*
* \note This function is only for internal use by other library
* functions; you must not call it directly.
*
* \param invkey Round keys for the equivalent inverse cipher
* \param fwdkey Original round keys (for encryption)
* \param nr Number of rounds (that is, number of round keys minus one)
*/
void mbedtls_aesni_inverse_key(unsigned char *invkey,
const unsigned char *fwdkey,
int nr);
/**
* \brief Internal key expansion for encryption
*
* \note This function is only for internal use by other library
* functions; you must not call it directly.
*
* \param rk Destination buffer where the round keys are written
* \param key Encryption key
* \param bits Key size in bits (must be 128, 192 or 256)
*
* \return 0 if successful, or MBEDTLS_ERR_AES_INVALID_KEY_LENGTH
*/
int mbedtls_aesni_setkey_enc(unsigned char *rk,
const unsigned char *key,
size_t bits);
#ifdef __cplusplus
}
#endif
#endif /* MBEDTLS_AESNI_HAVE_CODE */
#endif /* MBEDTLS_AESNI_C */
#endif /* MBEDTLS_AESNI_H */

713
mbtls_aes.c Normal file
View File

@ -0,0 +1,713 @@
#include "mbtls_aes.h"
#if defined(MBEDTLS_AESNI_C)
#include "aesni.h"
#endif
#if defined(MBEDTLS_AESCE_C)
#include "aesce.h"
#endif
/** Byte Reading Macros
*
* Given a multi-byte integer \p x, MBEDTLS_BYTE_n retrieves the n-th
* byte from x, where byte 0 is the least significant byte.
*/
#define MBEDTLS_BYTE_0(x) ((uint8_t) ((x) & 0xff))
#define MBEDTLS_BYTE_1(x) ((uint8_t) (((x) >> 8) & 0xff))
#define MBEDTLS_BYTE_2(x) ((uint8_t) (((x) >> 16) & 0xff))
#define MBEDTLS_BYTE_3(x) ((uint8_t) (((x) >> 24) & 0xff))
#define MBEDTLS_BYTE_4(x) ((uint8_t) (((x) >> 32) & 0xff))
#define MBEDTLS_BYTE_5(x) ((uint8_t) (((x) >> 40) & 0xff))
#define MBEDTLS_BYTE_6(x) ((uint8_t) (((x) >> 48) & 0xff))
#define MBEDTLS_BYTE_7(x) ((uint8_t) (((x) >> 56) & 0xff))
static void *(*const volatile memset_func)(void *, int, size_t) = memset;
void mbedtls_platform_zeroize(void *buf, size_t len)
{
if (buf && len > 0) {
memset_func(buf, 0, len);
}
}
/*
* Forward S-box
*/
static const unsigned char FSb[256] = {
0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16
};
/*
* Forward tables
*/
#define FT \
\
V(A5, 63, 63, C6), V(84, 7C, 7C, F8), V(99, 77, 77, EE), V(8D, 7B, 7B, F6), \
V(0D, F2, F2, FF), V(BD, 6B, 6B, D6), V(B1, 6F, 6F, DE), V(54, C5, C5, 91), \
V(50, 30, 30, 60), V(03, 01, 01, 02), V(A9, 67, 67, CE), V(7D, 2B, 2B, 56), \
V(19, FE, FE, E7), V(62, D7, D7, B5), V(E6, AB, AB, 4D), V(9A, 76, 76, EC), \
V(45, CA, CA, 8F), V(9D, 82, 82, 1F), V(40, C9, C9, 89), V(87, 7D, 7D, FA), \
V(15, FA, FA, EF), V(EB, 59, 59, B2), V(C9, 47, 47, 8E), V(0B, F0, F0, FB), \
V(EC, AD, AD, 41), V(67, D4, D4, B3), V(FD, A2, A2, 5F), V(EA, AF, AF, 45), \
V(BF, 9C, 9C, 23), V(F7, A4, A4, 53), V(96, 72, 72, E4), V(5B, C0, C0, 9B), \
V(C2, B7, B7, 75), V(1C, FD, FD, E1), V(AE, 93, 93, 3D), V(6A, 26, 26, 4C), \
V(5A, 36, 36, 6C), V(41, 3F, 3F, 7E), V(02, F7, F7, F5), V(4F, CC, CC, 83), \
V(5C, 34, 34, 68), V(F4, A5, A5, 51), V(34, E5, E5, D1), V(08, F1, F1, F9), \
V(93, 71, 71, E2), V(73, D8, D8, AB), V(53, 31, 31, 62), V(3F, 15, 15, 2A), \
V(0C, 04, 04, 08), V(52, C7, C7, 95), V(65, 23, 23, 46), V(5E, C3, C3, 9D), \
V(28, 18, 18, 30), V(A1, 96, 96, 37), V(0F, 05, 05, 0A), V(B5, 9A, 9A, 2F), \
V(09, 07, 07, 0E), V(36, 12, 12, 24), V(9B, 80, 80, 1B), V(3D, E2, E2, DF), \
V(26, EB, EB, CD), V(69, 27, 27, 4E), V(CD, B2, B2, 7F), V(9F, 75, 75, EA), \
V(1B, 09, 09, 12), V(9E, 83, 83, 1D), V(74, 2C, 2C, 58), V(2E, 1A, 1A, 34), \
V(2D, 1B, 1B, 36), V(B2, 6E, 6E, DC), V(EE, 5A, 5A, B4), V(FB, A0, A0, 5B), \
V(F6, 52, 52, A4), V(4D, 3B, 3B, 76), V(61, D6, D6, B7), V(CE, B3, B3, 7D), \
V(7B, 29, 29, 52), V(3E, E3, E3, DD), V(71, 2F, 2F, 5E), V(97, 84, 84, 13), \
V(F5, 53, 53, A6), V(68, D1, D1, B9), V(00, 00, 00, 00), V(2C, ED, ED, C1), \
V(60, 20, 20, 40), V(1F, FC, FC, E3), V(C8, B1, B1, 79), V(ED, 5B, 5B, B6), \
V(BE, 6A, 6A, D4), V(46, CB, CB, 8D), V(D9, BE, BE, 67), V(4B, 39, 39, 72), \
V(DE, 4A, 4A, 94), V(D4, 4C, 4C, 98), V(E8, 58, 58, B0), V(4A, CF, CF, 85), \
V(6B, D0, D0, BB), V(2A, EF, EF, C5), V(E5, AA, AA, 4F), V(16, FB, FB, ED), \
V(C5, 43, 43, 86), V(D7, 4D, 4D, 9A), V(55, 33, 33, 66), V(94, 85, 85, 11), \
V(CF, 45, 45, 8A), V(10, F9, F9, E9), V(06, 02, 02, 04), V(81, 7F, 7F, FE), \
V(F0, 50, 50, A0), V(44, 3C, 3C, 78), V(BA, 9F, 9F, 25), V(E3, A8, A8, 4B), \
V(F3, 51, 51, A2), V(FE, A3, A3, 5D), V(C0, 40, 40, 80), V(8A, 8F, 8F, 05), \
V(AD, 92, 92, 3F), V(BC, 9D, 9D, 21), V(48, 38, 38, 70), V(04, F5, F5, F1), \
V(DF, BC, BC, 63), V(C1, B6, B6, 77), V(75, DA, DA, AF), V(63, 21, 21, 42), \
V(30, 10, 10, 20), V(1A, FF, FF, E5), V(0E, F3, F3, FD), V(6D, D2, D2, BF), \
V(4C, CD, CD, 81), V(14, 0C, 0C, 18), V(35, 13, 13, 26), V(2F, EC, EC, C3), \
V(E1, 5F, 5F, BE), V(A2, 97, 97, 35), V(CC, 44, 44, 88), V(39, 17, 17, 2E), \
V(57, C4, C4, 93), V(F2, A7, A7, 55), V(82, 7E, 7E, FC), V(47, 3D, 3D, 7A), \
V(AC, 64, 64, C8), V(E7, 5D, 5D, BA), V(2B, 19, 19, 32), V(95, 73, 73, E6), \
V(A0, 60, 60, C0), V(98, 81, 81, 19), V(D1, 4F, 4F, 9E), V(7F, DC, DC, A3), \
V(66, 22, 22, 44), V(7E, 2A, 2A, 54), V(AB, 90, 90, 3B), V(83, 88, 88, 0B), \
V(CA, 46, 46, 8C), V(29, EE, EE, C7), V(D3, B8, B8, 6B), V(3C, 14, 14, 28), \
V(79, DE, DE, A7), V(E2, 5E, 5E, BC), V(1D, 0B, 0B, 16), V(76, DB, DB, AD), \
V(3B, E0, E0, DB), V(56, 32, 32, 64), V(4E, 3A, 3A, 74), V(1E, 0A, 0A, 14), \
V(DB, 49, 49, 92), V(0A, 06, 06, 0C), V(6C, 24, 24, 48), V(E4, 5C, 5C, B8), \
V(5D, C2, C2, 9F), V(6E, D3, D3, BD), V(EF, AC, AC, 43), V(A6, 62, 62, C4), \
V(A8, 91, 91, 39), V(A4, 95, 95, 31), V(37, E4, E4, D3), V(8B, 79, 79, F2), \
V(32, E7, E7, D5), V(43, C8, C8, 8B), V(59, 37, 37, 6E), V(B7, 6D, 6D, DA), \
V(8C, 8D, 8D, 01), V(64, D5, D5, B1), V(D2, 4E, 4E, 9C), V(E0, A9, A9, 49), \
V(B4, 6C, 6C, D8), V(FA, 56, 56, AC), V(07, F4, F4, F3), V(25, EA, EA, CF), \
V(AF, 65, 65, CA), V(8E, 7A, 7A, F4), V(E9, AE, AE, 47), V(18, 08, 08, 10), \
V(D5, BA, BA, 6F), V(88, 78, 78, F0), V(6F, 25, 25, 4A), V(72, 2E, 2E, 5C), \
V(24, 1C, 1C, 38), V(F1, A6, A6, 57), V(C7, B4, B4, 73), V(51, C6, C6, 97), \
V(23, E8, E8, CB), V(7C, DD, DD, A1), V(9C, 74, 74, E8), V(21, 1F, 1F, 3E), \
V(DD, 4B, 4B, 96), V(DC, BD, BD, 61), V(86, 8B, 8B, 0D), V(85, 8A, 8A, 0F), \
V(90, 70, 70, E0), V(42, 3E, 3E, 7C), V(C4, B5, B5, 71), V(AA, 66, 66, CC), \
V(D8, 48, 48, 90), V(05, 03, 03, 06), V(01, F6, F6, F7), V(12, 0E, 0E, 1C), \
V(A3, 61, 61, C2), V(5F, 35, 35, 6A), V(F9, 57, 57, AE), V(D0, B9, B9, 69), \
V(91, 86, 86, 17), V(58, C1, C1, 99), V(27, 1D, 1D, 3A), V(B9, 9E, 9E, 27), \
V(38, E1, E1, D9), V(13, F8, F8, EB), V(B3, 98, 98, 2B), V(33, 11, 11, 22), \
V(BB, 69, 69, D2), V(70, D9, D9, A9), V(89, 8E, 8E, 07), V(A7, 94, 94, 33), \
V(B6, 9B, 9B, 2D), V(22, 1E, 1E, 3C), V(92, 87, 87, 15), V(20, E9, E9, C9), \
V(49, CE, CE, 87), V(FF, 55, 55, AA), V(78, 28, 28, 50), V(7A, DF, DF, A5), \
V(8F, 8C, 8C, 03), V(F8, A1, A1, 59), V(80, 89, 89, 09), V(17, 0D, 0D, 1A), \
V(DA, BF, BF, 65), V(31, E6, E6, D7), V(C6, 42, 42, 84), V(B8, 68, 68, D0), \
V(C3, 41, 41, 82), V(B0, 99, 99, 29), V(77, 2D, 2D, 5A), V(11, 0F, 0F, 1E), \
V(CB, B0, B0, 7B), V(FC, 54, 54, A8), V(D6, BB, BB, 6D), V(3A, 16, 16, 2C)
#define V(a, b, c, d) 0x##a##b##c##d
static const uint32_t FT0[256] = { FT };
#undef V
#define V(a, b, c, d) 0x##b##c##d##a
static const uint32_t FT1[256] = { FT };
#undef V
#define V(a, b, c, d) 0x##c##d##a##b
static const uint32_t FT2[256] = { FT };
#undef V
#define V(a, b, c, d) 0x##d##a##b##c
static const uint32_t FT3[256] = { FT };
#undef V
#undef FT
/*
* Reverse S-box
*/
static const unsigned char RSb[256] = {
0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38,
0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D,
0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2,
0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA,
0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A,
0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA,
0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85,
0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20,
0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31,
0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0,
0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26,
0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D
};
/*
* Reverse tables
*/
#define RT \
\
V(50, A7, F4, 51), V(53, 65, 41, 7E), V(C3, A4, 17, 1A), V(96, 5E, 27, 3A), \
V(CB, 6B, AB, 3B), V(F1, 45, 9D, 1F), V(AB, 58, FA, AC), V(93, 03, E3, 4B), \
V(55, FA, 30, 20), V(F6, 6D, 76, AD), V(91, 76, CC, 88), V(25, 4C, 02, F5), \
V(FC, D7, E5, 4F), V(D7, CB, 2A, C5), V(80, 44, 35, 26), V(8F, A3, 62, B5), \
V(49, 5A, B1, DE), V(67, 1B, BA, 25), V(98, 0E, EA, 45), V(E1, C0, FE, 5D), \
V(02, 75, 2F, C3), V(12, F0, 4C, 81), V(A3, 97, 46, 8D), V(C6, F9, D3, 6B), \
V(E7, 5F, 8F, 03), V(95, 9C, 92, 15), V(EB, 7A, 6D, BF), V(DA, 59, 52, 95), \
V(2D, 83, BE, D4), V(D3, 21, 74, 58), V(29, 69, E0, 49), V(44, C8, C9, 8E), \
V(6A, 89, C2, 75), V(78, 79, 8E, F4), V(6B, 3E, 58, 99), V(DD, 71, B9, 27), \
V(B6, 4F, E1, BE), V(17, AD, 88, F0), V(66, AC, 20, C9), V(B4, 3A, CE, 7D), \
V(18, 4A, DF, 63), V(82, 31, 1A, E5), V(60, 33, 51, 97), V(45, 7F, 53, 62), \
V(E0, 77, 64, B1), V(84, AE, 6B, BB), V(1C, A0, 81, FE), V(94, 2B, 08, F9), \
V(58, 68, 48, 70), V(19, FD, 45, 8F), V(87, 6C, DE, 94), V(B7, F8, 7B, 52), \
V(23, D3, 73, AB), V(E2, 02, 4B, 72), V(57, 8F, 1F, E3), V(2A, AB, 55, 66), \
V(07, 28, EB, B2), V(03, C2, B5, 2F), V(9A, 7B, C5, 86), V(A5, 08, 37, D3), \
V(F2, 87, 28, 30), V(B2, A5, BF, 23), V(BA, 6A, 03, 02), V(5C, 82, 16, ED), \
V(2B, 1C, CF, 8A), V(92, B4, 79, A7), V(F0, F2, 07, F3), V(A1, E2, 69, 4E), \
V(CD, F4, DA, 65), V(D5, BE, 05, 06), V(1F, 62, 34, D1), V(8A, FE, A6, C4), \
V(9D, 53, 2E, 34), V(A0, 55, F3, A2), V(32, E1, 8A, 05), V(75, EB, F6, A4), \
V(39, EC, 83, 0B), V(AA, EF, 60, 40), V(06, 9F, 71, 5E), V(51, 10, 6E, BD), \
V(F9, 8A, 21, 3E), V(3D, 06, DD, 96), V(AE, 05, 3E, DD), V(46, BD, E6, 4D), \
V(B5, 8D, 54, 91), V(05, 5D, C4, 71), V(6F, D4, 06, 04), V(FF, 15, 50, 60), \
V(24, FB, 98, 19), V(97, E9, BD, D6), V(CC, 43, 40, 89), V(77, 9E, D9, 67), \
V(BD, 42, E8, B0), V(88, 8B, 89, 07), V(38, 5B, 19, E7), V(DB, EE, C8, 79), \
V(47, 0A, 7C, A1), V(E9, 0F, 42, 7C), V(C9, 1E, 84, F8), V(00, 00, 00, 00), \
V(83, 86, 80, 09), V(48, ED, 2B, 32), V(AC, 70, 11, 1E), V(4E, 72, 5A, 6C), \
V(FB, FF, 0E, FD), V(56, 38, 85, 0F), V(1E, D5, AE, 3D), V(27, 39, 2D, 36), \
V(64, D9, 0F, 0A), V(21, A6, 5C, 68), V(D1, 54, 5B, 9B), V(3A, 2E, 36, 24), \
V(B1, 67, 0A, 0C), V(0F, E7, 57, 93), V(D2, 96, EE, B4), V(9E, 91, 9B, 1B), \
V(4F, C5, C0, 80), V(A2, 20, DC, 61), V(69, 4B, 77, 5A), V(16, 1A, 12, 1C), \
V(0A, BA, 93, E2), V(E5, 2A, A0, C0), V(43, E0, 22, 3C), V(1D, 17, 1B, 12), \
V(0B, 0D, 09, 0E), V(AD, C7, 8B, F2), V(B9, A8, B6, 2D), V(C8, A9, 1E, 14), \
V(85, 19, F1, 57), V(4C, 07, 75, AF), V(BB, DD, 99, EE), V(FD, 60, 7F, A3), \
V(9F, 26, 01, F7), V(BC, F5, 72, 5C), V(C5, 3B, 66, 44), V(34, 7E, FB, 5B), \
V(76, 29, 43, 8B), V(DC, C6, 23, CB), V(68, FC, ED, B6), V(63, F1, E4, B8), \
V(CA, DC, 31, D7), V(10, 85, 63, 42), V(40, 22, 97, 13), V(20, 11, C6, 84), \
V(7D, 24, 4A, 85), V(F8, 3D, BB, D2), V(11, 32, F9, AE), V(6D, A1, 29, C7), \
V(4B, 2F, 9E, 1D), V(F3, 30, B2, DC), V(EC, 52, 86, 0D), V(D0, E3, C1, 77), \
V(6C, 16, B3, 2B), V(99, B9, 70, A9), V(FA, 48, 94, 11), V(22, 64, E9, 47), \
V(C4, 8C, FC, A8), V(1A, 3F, F0, A0), V(D8, 2C, 7D, 56), V(EF, 90, 33, 22), \
V(C7, 4E, 49, 87), V(C1, D1, 38, D9), V(FE, A2, CA, 8C), V(36, 0B, D4, 98), \
V(CF, 81, F5, A6), V(28, DE, 7A, A5), V(26, 8E, B7, DA), V(A4, BF, AD, 3F), \
V(E4, 9D, 3A, 2C), V(0D, 92, 78, 50), V(9B, CC, 5F, 6A), V(62, 46, 7E, 54), \
V(C2, 13, 8D, F6), V(E8, B8, D8, 90), V(5E, F7, 39, 2E), V(F5, AF, C3, 82), \
V(BE, 80, 5D, 9F), V(7C, 93, D0, 69), V(A9, 2D, D5, 6F), V(B3, 12, 25, CF), \
V(3B, 99, AC, C8), V(A7, 7D, 18, 10), V(6E, 63, 9C, E8), V(7B, BB, 3B, DB), \
V(09, 78, 26, CD), V(F4, 18, 59, 6E), V(01, B7, 9A, EC), V(A8, 9A, 4F, 83), \
V(65, 6E, 95, E6), V(7E, E6, FF, AA), V(08, CF, BC, 21), V(E6, E8, 15, EF), \
V(D9, 9B, E7, BA), V(CE, 36, 6F, 4A), V(D4, 09, 9F, EA), V(D6, 7C, B0, 29), \
V(AF, B2, A4, 31), V(31, 23, 3F, 2A), V(30, 94, A5, C6), V(C0, 66, A2, 35), \
V(37, BC, 4E, 74), V(A6, CA, 82, FC), V(B0, D0, 90, E0), V(15, D8, A7, 33), \
V(4A, 98, 04, F1), V(F7, DA, EC, 41), V(0E, 50, CD, 7F), V(2F, F6, 91, 17), \
V(8D, D6, 4D, 76), V(4D, B0, EF, 43), V(54, 4D, AA, CC), V(DF, 04, 96, E4), \
V(E3, B5, D1, 9E), V(1B, 88, 6A, 4C), V(B8, 1F, 2C, C1), V(7F, 51, 65, 46), \
V(04, EA, 5E, 9D), V(5D, 35, 8C, 01), V(73, 74, 87, FA), V(2E, 41, 0B, FB), \
V(5A, 1D, 67, B3), V(52, D2, DB, 92), V(33, 56, 10, E9), V(13, 47, D6, 6D), \
V(8C, 61, D7, 9A), V(7A, 0C, A1, 37), V(8E, 14, F8, 59), V(89, 3C, 13, EB), \
V(EE, 27, A9, CE), V(35, C9, 61, B7), V(ED, E5, 1C, E1), V(3C, B1, 47, 7A), \
V(59, DF, D2, 9C), V(3F, 73, F2, 55), V(79, CE, 14, 18), V(BF, 37, C7, 73), \
V(EA, CD, F7, 53), V(5B, AA, FD, 5F), V(14, 6F, 3D, DF), V(86, DB, 44, 78), \
V(81, F3, AF, CA), V(3E, C4, 68, B9), V(2C, 34, 24, 38), V(5F, 40, A3, C2), \
V(72, C3, 1D, 16), V(0C, 25, E2, BC), V(8B, 49, 3C, 28), V(41, 95, 0D, FF), \
V(71, 01, A8, 39), V(DE, B3, 0C, 08), V(9C, E4, B4, D8), V(90, C1, 56, 64), \
V(61, 84, CB, 7B), V(70, B6, 32, D5), V(74, 5C, 6C, 48), V(42, 57, B8, D0)
#define V(a, b, c, d) 0x##a##b##c##d
static const uint32_t RT0[256] = { RT };
#undef V
#define V(a, b, c, d) 0x##b##c##d##a
static const uint32_t RT1[256] = { RT };
#undef V
#define V(a, b, c, d) 0x##c##d##a##b
static const uint32_t RT2[256] = { RT };
#undef V
#define V(a, b, c, d) 0x##d##a##b##c
static const uint32_t RT3[256] = { RT };
#undef V
#undef RT
#define AES_RT0(idx) RT0[idx]
#define AES_RT1(idx) RT1[idx]
#define AES_RT2(idx) RT2[idx]
#define AES_RT3(idx) RT3[idx]
#define AES_FT0(idx) FT0[idx]
#define AES_FT1(idx) FT1[idx]
#define AES_FT2(idx) FT2[idx]
#define AES_FT3(idx) FT3[idx]
/*
* Round constants
*/
static const uint32_t RCON[10] = {
0x00000001, 0x00000002, 0x00000004, 0x00000008,
0x00000010, 0x00000020, 0x00000040, 0x00000080,
0x0000001B, 0x00000036
};
#define AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
do \
{ \
(X0) = *RK++ ^ AES_FT0(MBEDTLS_BYTE_0(Y0)) ^ \
AES_FT1(MBEDTLS_BYTE_1(Y1)) ^ \
AES_FT2(MBEDTLS_BYTE_2(Y2)) ^ \
AES_FT3(MBEDTLS_BYTE_3(Y3)); \
\
(X1) = *RK++ ^ AES_FT0(MBEDTLS_BYTE_0(Y1)) ^ \
AES_FT1(MBEDTLS_BYTE_1(Y2)) ^ \
AES_FT2(MBEDTLS_BYTE_2(Y3)) ^ \
AES_FT3(MBEDTLS_BYTE_3(Y0)); \
\
(X2) = *RK++ ^ AES_FT0(MBEDTLS_BYTE_0(Y2)) ^ \
AES_FT1(MBEDTLS_BYTE_1(Y3)) ^ \
AES_FT2(MBEDTLS_BYTE_2(Y0)) ^ \
AES_FT3(MBEDTLS_BYTE_3(Y1)); \
\
(X3) = *RK++ ^ AES_FT0(MBEDTLS_BYTE_0(Y3)) ^ \
AES_FT1(MBEDTLS_BYTE_1(Y0)) ^ \
AES_FT2(MBEDTLS_BYTE_2(Y1)) ^ \
AES_FT3(MBEDTLS_BYTE_3(Y2)); \
} while (0)
#define AES_RROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
do \
{ \
(X0) = *RK++ ^ AES_RT0(MBEDTLS_BYTE_0(Y0)) ^ \
AES_RT1(MBEDTLS_BYTE_1(Y3)) ^ \
AES_RT2(MBEDTLS_BYTE_2(Y2)) ^ \
AES_RT3(MBEDTLS_BYTE_3(Y1)); \
\
(X1) = *RK++ ^ AES_RT0(MBEDTLS_BYTE_0(Y1)) ^ \
AES_RT1(MBEDTLS_BYTE_1(Y0)) ^ \
AES_RT2(MBEDTLS_BYTE_2(Y3)) ^ \
AES_RT3(MBEDTLS_BYTE_3(Y2)); \
\
(X2) = *RK++ ^ AES_RT0(MBEDTLS_BYTE_0(Y2)) ^ \
AES_RT1(MBEDTLS_BYTE_1(Y1)) ^ \
AES_RT2(MBEDTLS_BYTE_2(Y0)) ^ \
AES_RT3(MBEDTLS_BYTE_3(Y3)); \
\
(X3) = *RK++ ^ AES_RT0(MBEDTLS_BYTE_0(Y3)) ^ \
AES_RT1(MBEDTLS_BYTE_1(Y2)) ^ \
AES_RT2(MBEDTLS_BYTE_2(Y1)) ^ \
AES_RT3(MBEDTLS_BYTE_3(Y0)); \
} while (0)
void mbedtls_aes_init(mbedtls_aes_context *ctx)
{
memset(ctx, 0, sizeof(mbedtls_aes_context));
}
void mbedtls_aes_free(mbedtls_aes_context *ctx)
{
if (ctx == NULL) {
return;
}
mbedtls_platform_zeroize(ctx, sizeof(mbedtls_aes_context));
}
static unsigned mbedtls_aes_rk_offset(uint32_t *buf)
{
(void) buf;
return 0;
}
/**
* Read the unsigned 32 bits integer from the given address, which need not
* be aligned.
*
* \param p pointer to 4 bytes of data
* \return Data at the given address
*/
uint32_t mbedtls_get_unaligned_uint32(const void *p)
{
uint32_t r;
memcpy(&r, p, sizeof(r));
return r;
}
#define MBEDTLS_GET_UINT32_LE(data, offset) \
((MBEDTLS_IS_BIG_ENDIAN) \
? MBEDTLS_BSWAP32(mbedtls_get_unaligned_uint32((data) + (offset))) \
: mbedtls_get_unaligned_uint32((data) + (offset)) \
)
int mbedtls_aes_setkey_enc(mbedtls_aes_context *ctx, const unsigned char *key, unsigned int keybits)
{
switch (keybits) {
case 128: ctx->nr = 10; break;
case 192: ctx->nr = 12; break;
case 256: ctx->nr = 14; break;
default: return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
}
ctx->rk_offset = mbedtls_aes_rk_offset(ctx->buf);
uint32_t *RK = ctx->buf + ctx->rk_offset;
#if defined(MBEDTLS_AESNI_HAVE_CODE)
if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
return mbedtls_aesni_setkey_enc((unsigned char *) RK, key, keybits);
}
#endif
#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64)
if (mbedtls_aesce_has_support()) {
return mbedtls_aesce_setkey_enc((unsigned char *) RK, key, keybits);
}
#endif
unsigned int i;
for (i = 0; i < (keybits >> 5); i++) {
RK[i] = MBEDTLS_GET_UINT32_LE(key, i << 2);
}
switch (ctx->nr) {
case 10:
for (i = 0; i < 10; i++, RK += 4) {
RK[4] = RK[0] ^ RCON[i] ^
((uint32_t) FSb[MBEDTLS_BYTE_1(RK[3])]) ^
((uint32_t) FSb[MBEDTLS_BYTE_2(RK[3])] << 8) ^
((uint32_t) FSb[MBEDTLS_BYTE_3(RK[3])] << 16) ^
((uint32_t) FSb[MBEDTLS_BYTE_0(RK[3])] << 24);
RK[5] = RK[1] ^ RK[4];
RK[6] = RK[2] ^ RK[5];
RK[7] = RK[3] ^ RK[6];
}
break;
case 12:
for (i = 0; i < 8; i++, RK += 6) {
RK[6] = RK[0] ^ RCON[i] ^
((uint32_t) FSb[MBEDTLS_BYTE_1(RK[5])]) ^
((uint32_t) FSb[MBEDTLS_BYTE_2(RK[5])] << 8) ^
((uint32_t) FSb[MBEDTLS_BYTE_3(RK[5])] << 16) ^
((uint32_t) FSb[MBEDTLS_BYTE_0(RK[5])] << 24);
RK[7] = RK[1] ^ RK[6];
RK[8] = RK[2] ^ RK[7];
RK[9] = RK[3] ^ RK[8];
RK[10] = RK[4] ^ RK[9];
RK[11] = RK[5] ^ RK[10];
}
break;
case 14:
for (i = 0; i < 7; i++, RK += 8) {
RK[8] = RK[0] ^ RCON[i] ^
((uint32_t) FSb[MBEDTLS_BYTE_1(RK[7])]) ^
((uint32_t) FSb[MBEDTLS_BYTE_2(RK[7])] << 8) ^
((uint32_t) FSb[MBEDTLS_BYTE_3(RK[7])] << 16) ^
((uint32_t) FSb[MBEDTLS_BYTE_0(RK[7])] << 24);
RK[9] = RK[1] ^ RK[8];
RK[10] = RK[2] ^ RK[9];
RK[11] = RK[3] ^ RK[10];
RK[12] = RK[4] ^
((uint32_t) FSb[MBEDTLS_BYTE_0(RK[11])]) ^
((uint32_t) FSb[MBEDTLS_BYTE_1(RK[11])] << 8) ^
((uint32_t) FSb[MBEDTLS_BYTE_2(RK[11])] << 16) ^
((uint32_t) FSb[MBEDTLS_BYTE_3(RK[11])] << 24);
RK[13] = RK[5] ^ RK[12];
RK[14] = RK[6] ^ RK[13];
RK[15] = RK[7] ^ RK[14];
}
break;
}
return 0;
}
int mbedtls_aes_setkey_dec(mbedtls_aes_context *ctx, const unsigned char *key, unsigned int keybits)
{
mbedtls_aes_context cty;
mbedtls_aes_init(&cty);
ctx->rk_offset = mbedtls_aes_rk_offset(ctx->buf);
uint32_t *RK = ctx->buf + ctx->rk_offset;
int ret;
/* Also checks keybits */
if ((ret = mbedtls_aes_setkey_enc(&cty, key, keybits)) != 0) {
goto exit;
}
ctx->nr = cty.nr;
#if defined(MBEDTLS_AESNI_HAVE_CODE)
if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
mbedtls_aesni_inverse_key((unsigned char *) RK,
(const unsigned char *) (cty.buf + cty.rk_offset), ctx->nr);
goto exit;
}
#endif
#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64)
if (mbedtls_aesce_has_support()) {
mbedtls_aesce_inverse_key(
(unsigned char *) RK,
(const unsigned char *) (cty.buf + cty.rk_offset),
ctx->nr);
goto exit;
}
#endif
uint32_t *SK = cty.buf + cty.rk_offset + cty.nr * 4;
*RK++ = *SK++;
*RK++ = *SK++;
*RK++ = *SK++;
*RK++ = *SK++;
int i, j;
for (i = ctx->nr - 1, SK -= 8; i > 0; i--, SK -= 8) {
for (j = 0; j < 4; j++, SK++) {
*RK++ = AES_RT0(FSb[MBEDTLS_BYTE_0(*SK)]) ^
AES_RT1(FSb[MBEDTLS_BYTE_1(*SK)]) ^
AES_RT2(FSb[MBEDTLS_BYTE_2(*SK)]) ^
AES_RT3(FSb[MBEDTLS_BYTE_3(*SK)]);
}
}
*RK++ = *SK++;
*RK++ = *SK++;
*RK++ = *SK++;
*RK++ = *SK++;
exit:
mbedtls_aes_free(&cty);
return ret;
}
void mbedtls_put_unaligned_uint32(void *p, uint32_t x)
{
memcpy(p, &x, sizeof(x));
}
/**
* Put in memory a 32 bits unsigned integer in little-endian order.
*
* \param n 32 bits unsigned integer to put in memory.
* \param data Base address of the memory where to put the 32
* bits unsigned integer in.
* \param offset Offset from \p data where to put the least significant
* byte of the 32 bits unsigned integer \p n.
*/
#define MBEDTLS_PUT_UINT32_LE(n, data, offset) \
{ \
if (MBEDTLS_IS_BIG_ENDIAN) \
{ \
mbedtls_put_unaligned_uint32((data) + (offset), MBEDTLS_BSWAP32((uint32_t) (n))); \
} \
else \
{ \
mbedtls_put_unaligned_uint32((data) + (offset), ((uint32_t) (n))); \
} \
}
int mbedtls_internal_aes_encrypt(mbedtls_aes_context *ctx,
const unsigned char input[16],
unsigned char output[16])
{
int i;
uint32_t *RK = ctx->buf + ctx->rk_offset;
struct {
uint32_t X[4];
uint32_t Y[4];
} t;
t.X[0] = MBEDTLS_GET_UINT32_LE(input, 0); t.X[0] ^= *RK++;
t.X[1] = MBEDTLS_GET_UINT32_LE(input, 4); t.X[1] ^= *RK++;
t.X[2] = MBEDTLS_GET_UINT32_LE(input, 8); t.X[2] ^= *RK++;
t.X[3] = MBEDTLS_GET_UINT32_LE(input, 12); t.X[3] ^= *RK++;
for (i = (ctx->nr >> 1) - 1; i > 0; i--) {
AES_FROUND(t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3]);
AES_FROUND(t.X[0], t.X[1], t.X[2], t.X[3], t.Y[0], t.Y[1], t.Y[2], t.Y[3]);
}
AES_FROUND(t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3]);
t.X[0] = *RK++ ^ \
((uint32_t) FSb[MBEDTLS_BYTE_0(t.Y[0])]) ^
((uint32_t) FSb[MBEDTLS_BYTE_1(t.Y[1])] << 8) ^
((uint32_t) FSb[MBEDTLS_BYTE_2(t.Y[2])] << 16) ^
((uint32_t) FSb[MBEDTLS_BYTE_3(t.Y[3])] << 24);
t.X[1] = *RK++ ^ \
((uint32_t) FSb[MBEDTLS_BYTE_0(t.Y[1])]) ^
((uint32_t) FSb[MBEDTLS_BYTE_1(t.Y[2])] << 8) ^
((uint32_t) FSb[MBEDTLS_BYTE_2(t.Y[3])] << 16) ^
((uint32_t) FSb[MBEDTLS_BYTE_3(t.Y[0])] << 24);
t.X[2] = *RK++ ^ \
((uint32_t) FSb[MBEDTLS_BYTE_0(t.Y[2])]) ^
((uint32_t) FSb[MBEDTLS_BYTE_1(t.Y[3])] << 8) ^
((uint32_t) FSb[MBEDTLS_BYTE_2(t.Y[0])] << 16) ^
((uint32_t) FSb[MBEDTLS_BYTE_3(t.Y[1])] << 24);
t.X[3] = *RK++ ^ \
((uint32_t) FSb[MBEDTLS_BYTE_0(t.Y[3])]) ^
((uint32_t) FSb[MBEDTLS_BYTE_1(t.Y[0])] << 8) ^
((uint32_t) FSb[MBEDTLS_BYTE_2(t.Y[1])] << 16) ^
((uint32_t) FSb[MBEDTLS_BYTE_3(t.Y[2])] << 24);
MBEDTLS_PUT_UINT32_LE(t.X[0], output, 0);
MBEDTLS_PUT_UINT32_LE(t.X[1], output, 4);
MBEDTLS_PUT_UINT32_LE(t.X[2], output, 8);
MBEDTLS_PUT_UINT32_LE(t.X[3], output, 12);
mbedtls_platform_zeroize(&t, sizeof(t));
return 0;
}
int mbedtls_internal_aes_decrypt(mbedtls_aes_context *ctx,
const unsigned char input[16],
unsigned char output[16])
{
int i;
uint32_t *RK = ctx->buf + ctx->rk_offset;
struct {
uint32_t X[4];
uint32_t Y[4];
} t;
t.X[0] = MBEDTLS_GET_UINT32_LE(input, 0); t.X[0] ^= *RK++;
t.X[1] = MBEDTLS_GET_UINT32_LE(input, 4); t.X[1] ^= *RK++;
t.X[2] = MBEDTLS_GET_UINT32_LE(input, 8); t.X[2] ^= *RK++;
t.X[3] = MBEDTLS_GET_UINT32_LE(input, 12); t.X[3] ^= *RK++;
for (i = (ctx->nr >> 1) - 1; i > 0; i--) {
AES_RROUND(t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3]);
AES_RROUND(t.X[0], t.X[1], t.X[2], t.X[3], t.Y[0], t.Y[1], t.Y[2], t.Y[3]);
}
AES_RROUND(t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3]);
t.X[0] = *RK++ ^ \
((uint32_t) RSb[MBEDTLS_BYTE_0(t.Y[0])]) ^
((uint32_t) RSb[MBEDTLS_BYTE_1(t.Y[3])] << 8) ^
((uint32_t) RSb[MBEDTLS_BYTE_2(t.Y[2])] << 16) ^
((uint32_t) RSb[MBEDTLS_BYTE_3(t.Y[1])] << 24);
t.X[1] = *RK++ ^ \
((uint32_t) RSb[MBEDTLS_BYTE_0(t.Y[1])]) ^
((uint32_t) RSb[MBEDTLS_BYTE_1(t.Y[0])] << 8) ^
((uint32_t) RSb[MBEDTLS_BYTE_2(t.Y[3])] << 16) ^
((uint32_t) RSb[MBEDTLS_BYTE_3(t.Y[2])] << 24);
t.X[2] = *RK++ ^ \
((uint32_t) RSb[MBEDTLS_BYTE_0(t.Y[2])]) ^
((uint32_t) RSb[MBEDTLS_BYTE_1(t.Y[1])] << 8) ^
((uint32_t) RSb[MBEDTLS_BYTE_2(t.Y[0])] << 16) ^
((uint32_t) RSb[MBEDTLS_BYTE_3(t.Y[3])] << 24);
t.X[3] = *RK++ ^ \
((uint32_t) RSb[MBEDTLS_BYTE_0(t.Y[3])]) ^
((uint32_t) RSb[MBEDTLS_BYTE_1(t.Y[2])] << 8) ^
((uint32_t) RSb[MBEDTLS_BYTE_2(t.Y[1])] << 16) ^
((uint32_t) RSb[MBEDTLS_BYTE_3(t.Y[0])] << 24);
MBEDTLS_PUT_UINT32_LE(t.X[0], output, 0);
MBEDTLS_PUT_UINT32_LE(t.X[1], output, 4);
MBEDTLS_PUT_UINT32_LE(t.X[2], output, 8);
MBEDTLS_PUT_UINT32_LE(t.X[3], output, 12);
mbedtls_platform_zeroize(&t, sizeof(t));
return 0;
}
int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx,
int mode,
const unsigned char input[16],
unsigned char output[16])
{
if (mode != MBEDTLS_AES_ENCRYPT && mode != MBEDTLS_AES_DECRYPT) {
return MBEDTLS_ERR_AES_BAD_INPUT_DATA;
}
#if defined(MBEDTLS_AESNI_HAVE_CODE)
if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
return mbedtls_aesni_crypt_ecb(ctx, mode, input, output);
}
#endif
#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64)
if (mbedtls_aesce_has_support()) {
return mbedtls_aesce_crypt_ecb(ctx, mode, input, output);
}
#endif
if (mode == MBEDTLS_AES_ENCRYPT) {
return mbedtls_internal_aes_encrypt(ctx, input, output);
}
return mbedtls_internal_aes_decrypt(ctx, input, output);
}

206
mbtls_aes.h Normal file
View File

@ -0,0 +1,206 @@
#pragma once
#include <stddef.h>
#include <stdint.h>
#include <string.h>
/* Can we do AESNI with inline assembly?
* (Only implemented with gas syntax, only for 64-bit.)
*/
#if defined(__GNUC__) && (defined(__amd64__) || defined(__x86_64__)) && !defined(MBEDTLS_HAVE_X86_64)
#define MBEDTLS_AESNI_C
#define MBEDTLS_HAVE_X86_64
#endif
#if !defined(MBEDTLS_HAVE_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#define MBEDTLS_AESCE_C
#define MBEDTLS_HAVE_ARM64
#endif
#endif
#define MBEDTLS_AES_ENCRYPT 1 /**< AES encryption. */
#define MBEDTLS_AES_DECRYPT 0 /**< AES decryption. */
/* Error codes in range 0x0020-0x0022 */
/** Invalid key length. */
#define MBEDTLS_ERR_AES_INVALID_KEY_LENGTH -0x0020
/** Invalid data input length. */
#define MBEDTLS_ERR_AES_INVALID_INPUT_LENGTH -0x0022
/* Error codes in range 0x0021-0x0025 */
/** Invalid input data. */
#define MBEDTLS_ERR_AES_BAD_INPUT_DATA -0x0021
#if !defined(__BYTE_ORDER__)
static const uint16_t mbedtls_byte_order_detector = { 0x100 };
#define MBEDTLS_IS_BIG_ENDIAN (*((unsigned char *) (&mbedtls_byte_order_detector)) == 0x01)
#else
#define MBEDTLS_IS_BIG_ENDIAN ((__BYTE_ORDER__) == (__ORDER_BIG_ENDIAN__))
#endif /* !defined(__BYTE_ORDER__) */
/*
* Detect GCC built-in byteswap routines
*/
#if defined(__GNUC__) && defined(__GNUC_PREREQ)
#if __GNUC_PREREQ(4, 8)
#define MBEDTLS_BSWAP16 __builtin_bswap16
#endif /* __GNUC_PREREQ(4,8) */
#if __GNUC_PREREQ(4, 3)
#define MBEDTLS_BSWAP32 __builtin_bswap32
#define MBEDTLS_BSWAP64 __builtin_bswap64
#endif /* __GNUC_PREREQ(4,3) */
#endif /* defined(__GNUC__) && defined(__GNUC_PREREQ) */
/*
* Detect Clang built-in byteswap routines
*/
#if defined(__clang__) && defined(__has_builtin)
#if __has_builtin(__builtin_bswap16) && !defined(MBEDTLS_BSWAP16)
#define MBEDTLS_BSWAP16 __builtin_bswap16
#endif /* __has_builtin(__builtin_bswap16) */
#if __has_builtin(__builtin_bswap32) && !defined(MBEDTLS_BSWAP32)
#define MBEDTLS_BSWAP32 __builtin_bswap32
#endif /* __has_builtin(__builtin_bswap32) */
#if __has_builtin(__builtin_bswap64) && !defined(MBEDTLS_BSWAP64)
#define MBEDTLS_BSWAP64 __builtin_bswap64
#endif /* __has_builtin(__builtin_bswap64) */
#endif /* defined(__clang__) && defined(__has_builtin) */
/*
* Detect MSVC built-in byteswap routines
*/
#if defined(_MSC_VER)
#if !defined(MBEDTLS_BSWAP16)
#define MBEDTLS_BSWAP16 _byteswap_ushort
#endif
#if !defined(MBEDTLS_BSWAP32)
#define MBEDTLS_BSWAP32 _byteswap_ulong
#endif
#if !defined(MBEDTLS_BSWAP64)
#define MBEDTLS_BSWAP64 _byteswap_uint64
#endif
#endif /* defined(_MSC_VER) */
/* Detect armcc built-in byteswap routine */
#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 410000) && !defined(MBEDTLS_BSWAP32)
#define MBEDTLS_BSWAP32 __rev
#endif
#ifdef __cplusplus
extern "C" {
#endif
/**
* Write the unsigned 32 bits integer to the given address, which need not
* be aligned.
*
* \param p pointer to 4 bytes of data
* \param x data to write
*/
/**
* \brief The AES context-type definition.
*/
typedef struct mbedtls_aes_context {
int nr; /*!< The number of rounds. */
size_t rk_offset; /*!< The offset in array elements to AES round keys in the buffer. */
uint32_t buf[68]; /*!< Unaligned data buffer. This buffer can
hold 32 extra Bytes, which can be used for
one of the following purposes:
<ul><li>Alignment if VIA padlock is
used.</li>
<li>Simplifying key expansion in the 256-bit
case by generating an extra round key.
</li></ul> */
} mbedtls_aes_context;
/**
* \brief This function initializes the specified AES context.
*
* It must be the first API called before using
* the context.
*
* \param ctx The AES context to initialize. This must not be \c NULL.
*/
void mbedtls_aes_init(mbedtls_aes_context *ctx);
/**
* \brief This function releases and clears the specified AES context.
*
* \param ctx The AES context to clear.
* If this is \c NULL, this function does nothing.
* Otherwise, the context must have been at least initialized.
*/
void mbedtls_aes_free(mbedtls_aes_context *ctx);
/**
* \brief This function sets the encryption key.
*
* \param ctx The AES context to which the key should be bound.
* It must be initialized.
* \param key The encryption key.
* This must be a readable buffer of size \p keybits bits.
* \param keybits The size of data passed in bits. Valid options are:
* <ul><li>128 bits</li>
* <li>192 bits</li>
* <li>256 bits</li></ul>
*
* \return \c 0 on success.
* \return #MBEDTLS_ERR_AES_INVALID_KEY_LENGTH on failure.
*/
int mbedtls_aes_setkey_enc(mbedtls_aes_context *ctx, const unsigned char *key,
unsigned int keybits);
/**
* \brief This function sets the decryption key.
*
* \param ctx The AES context to which the key should be bound.
* It must be initialized.
* \param key The decryption key.
* This must be a readable buffer of size \p keybits bits.
* \param keybits The size of data passed. Valid options are:
* <ul><li>128 bits</li>
* <li>192 bits</li>
* <li>256 bits</li></ul>
*
* \return \c 0 on success.
* \return #MBEDTLS_ERR_AES_INVALID_KEY_LENGTH on failure.
*/
int mbedtls_aes_setkey_dec(mbedtls_aes_context *ctx, const unsigned char *key,
unsigned int keybits);
/**
* \brief This function performs an AES single-block encryption or
* decryption operation.
*
* It performs the operation defined in the \p mode parameter
* (encrypt or decrypt), on the input data buffer defined in
* the \p input parameter.
*
* mbedtls_aes_init(), and either mbedtls_aes_setkey_enc() or
* mbedtls_aes_setkey_dec() must be called before the first
* call to this API with the same context.
*
* \param ctx The AES context to use for encryption or decryption.
* It must be initialized and bound to a key.
* \param mode The AES operation: #MBEDTLS_AES_ENCRYPT or
* #MBEDTLS_AES_DECRYPT.
* \param input The buffer holding the input data.
* It must be readable and at least \c 16 Bytes long.
* \param output The buffer where the output data will be written.
* It must be writeable and at least \c 16 Bytes long.
* \return \c 0 on success.
*/
int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx,
int mode,
const unsigned char input[16],
unsigned char output[16]);
#ifdef __cplusplus
}
#endif

View File

@ -9,7 +9,7 @@
#include <limits.h>
/* use configuration data */
#include <tomcrypt_custom.h>
#include "tomcrypt_custom.h"
#ifdef __cplusplus
extern "C" {
@ -63,17 +63,17 @@ enum {
CRYPT_PK_INVALID_PADDING /* Invalid padding on input */
};
#include <tomcrypt_cfg.h>
#include <tomcrypt_macros.h>
#include <tomcrypt_cipher.h>
#include <tomcrypt_hash.h>
#include <tomcrypt_mac.h>
#include <tomcrypt_prng.h>
#include <tomcrypt_pk.h>
#include <tomcrypt_math.h>
#include <tomcrypt_misc.h>
#include <tomcrypt_argchk.h>
#include <tomcrypt_pkcs.h>
#include "tomcrypt_cfg.h"
#include "tomcrypt_macros.h"
#include "tomcrypt_cipher.h"
#include "tomcrypt_hash.h"
#include "tomcrypt_mac.h"
#include "tomcrypt_prng.h"
#include "tomcrypt_pk.h"
#include "tomcrypt_math.h"
#include "tomcrypt_misc.h"
#include "tomcrypt_argchk.h"
#include "tomcrypt_pkcs.h"
#ifdef __cplusplus
}