soft aes implementation
This commit is contained in:
parent
95a3441ca7
commit
8c7a1014ef
|
@ -17,7 +17,9 @@
|
|||
|
||||
#ifdef __MCUXPRESSO
|
||||
#include "fsl_debug_console.h"
|
||||
#include "cr_section_macros.h"
|
||||
#define APP_NOCACHE AT_NONCACHEABLE_SECTION_ALIGN
|
||||
#define APP_RAMFUNC __RAMFUNC(SRAM_ITC)
|
||||
#define APP_PRINTF PRINTF
|
||||
#define CAMERA_BUFFER_ALIGN 64
|
||||
#else
|
||||
|
|
|
@ -1,46 +1,94 @@
|
|||
#include <string.h>
|
||||
#include "aes.h"
|
||||
|
||||
#define AES_TIMEOUT 1000
|
||||
#define AES_128_KEYROUND 10
|
||||
#define AES_192_KEYROUND 12
|
||||
#define AES_256_KEYROUND 14
|
||||
|
||||
#ifdef CM7
|
||||
#include "aes/CM7.h"
|
||||
#define _AES(x) CM7_1T_AES_##x
|
||||
#elif CM4F
|
||||
#include "aes/CM3.h"
|
||||
#define _AES(x) CM3_1T_AES_##x
|
||||
#else
|
||||
#error Unsupported architecture
|
||||
#endif
|
||||
|
||||
const static uint8_t cmac_iv[AES_IV_SIZE] __attribute__((aligned(4))) = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
//static CRYP_HandleTypeDef* _aes;
|
||||
|
||||
/*void aes_init(CRYP_HandleTypeDef* aes) {
|
||||
_aes = aes;
|
||||
}*/
|
||||
static uint8_t round_key[(AES_256_KEYROUND+1)*16];
|
||||
|
||||
uint8_t aes_encrypt(const uint8_t* key, const uint8_t* iv, const uint8_t* data, uint32_t len, uint8_t* out) {
|
||||
//_aes->Init.pKey = (uint32_t*) key;
|
||||
//_aes->Init.pInitVect = (uint32_t*) iv;
|
||||
uint8_t aes_encrypt_cbc(const uint8_t* key, const uint8_t* iv, const uint8_t* data, uint32_t len, uint8_t* out) {
|
||||
_AES(256_keyschedule_enc(round_key, key));
|
||||
|
||||
uint32_t* data_in_p = (uint32_t*) data;
|
||||
uint32_t* data_out_p = (uint32_t*) out;
|
||||
uint32_t* iv_p = (uint32_t*) iv;
|
||||
uint32_t blocks_cnt = len / 16;
|
||||
|
||||
for(uint32_t i = 0; i < blocks_cnt ; i++) {
|
||||
data_out_p[0] = data_in_p[0] ^ iv_p[0];
|
||||
data_out_p[1] = data_in_p[1] ^ iv_p[1];
|
||||
data_out_p[2] = data_in_p[2] ^ iv_p[2];
|
||||
data_out_p[3] = data_in_p[3] ^ iv_p[3];
|
||||
|
||||
_AES(encrypt(round_key, (uint8_t *) data_out_p, (uint8_t*) data_out_p, AES_256_KEYROUND));
|
||||
|
||||
iv_p = data_out_p;
|
||||
data_out_p += 4;
|
||||
data_in_p += 4;
|
||||
}
|
||||
|
||||
//return HAL_CRYP_Encrypt(_aes, (uint32_t*)data, len, (uint32_t*)out, AES_TIMEOUT) == HAL_OK;
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint8_t aes_decrypt(const uint8_t* key, const uint8_t* iv, const uint8_t* data, uint32_t len, uint8_t* out) {
|
||||
//_aes->Init.pKey = (uint32_t*) key;
|
||||
//_aes->Init.pInitVect = (uint32_t*) iv;
|
||||
uint8_t aes_decrypt_cbc(const uint8_t* key, const uint8_t* iv, const uint8_t* data, uint32_t len, uint8_t* out) {
|
||||
_AES(256_keyschedule_enc(round_key, key));
|
||||
_AES(keyschedule_dec(round_key, AES_256_KEYROUND));
|
||||
|
||||
uint32_t* data_out_p = (uint32_t*) out;
|
||||
uint32_t* iv_p = (uint32_t*) iv;
|
||||
uint32_t blocks_cnt = len / 16;
|
||||
|
||||
for(uint32_t i = 0; i < blocks_cnt; i++) {
|
||||
_AES(decrypt(round_key, (uint8_t *) data, (uint8_t *) data_out_p, AES_256_KEYROUND));
|
||||
|
||||
data_out_p[0] ^= iv_p[0];
|
||||
data_out_p[1] ^= iv_p[1];
|
||||
data_out_p[2] ^= iv_p[2];
|
||||
data_out_p[3] ^= iv_p[3];
|
||||
|
||||
iv_p = (uint32_t*)data;
|
||||
data_out_p += 4;
|
||||
data += 16;
|
||||
}
|
||||
|
||||
//return HAL_CRYP_Decrypt(_aes, (uint32_t*)data, len, (uint32_t*)out, AES_TIMEOUT) == HAL_OK;
|
||||
return 1;
|
||||
}
|
||||
|
||||
//TODO: Must be reimplemented completely
|
||||
static uint8_t cmac_tmp[256] __attribute__((aligned(4)));
|
||||
uint8_t aes_cmac(const uint8_t* key, const uint8_t* data, uint32_t len, uint8_t* out) {
|
||||
if (len >= 256) {
|
||||
return 0;
|
||||
}
|
||||
_AES(256_keyschedule_enc(round_key, key));
|
||||
|
||||
if (!aes_encrypt(key, cmac_iv, data, len, cmac_tmp)) {
|
||||
return 0;
|
||||
}
|
||||
uint32_t* data_in_p = (uint32_t*) data;
|
||||
uint32_t* data_out_p = (uint32_t*) out;
|
||||
uint32_t* iv_p = (uint32_t*) cmac_iv;
|
||||
uint32_t blocks_cnt = len / 16;
|
||||
|
||||
memcpy(out, &cmac_tmp[len-AES_IV_SIZE], AES_IV_SIZE);
|
||||
for(uint32_t i = 0; i < blocks_cnt ; i++) {
|
||||
data_out_p[0] = data_in_p[0] ^ iv_p[0];
|
||||
data_out_p[1] = data_in_p[1] ^ iv_p[1];
|
||||
data_out_p[2] = data_in_p[2] ^ iv_p[2];
|
||||
data_out_p[3] = data_in_p[3] ^ iv_p[3];
|
||||
|
||||
_AES(encrypt(round_key, (uint8_t *) data_out_p, (uint8_t*) data_out_p, AES_256_KEYROUND));
|
||||
|
||||
iv_p = data_out_p;
|
||||
data_in_p += 4;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -8,10 +8,8 @@
|
|||
|
||||
//void aes_init(CRYP_HandleTypeDef* aes);
|
||||
|
||||
uint8_t aes_encrypt(const uint8_t* key, const uint8_t* iv, const uint8_t* data, uint32_t len, uint8_t* out);
|
||||
uint8_t aes_decrypt(const uint8_t* key, const uint8_t* iv, const uint8_t* data, uint32_t len, uint8_t* out);
|
||||
uint8_t aes_encrypt_cbc(const uint8_t* key, const uint8_t* iv, const uint8_t* data, uint32_t len, uint8_t* out);
|
||||
uint8_t aes_decrypt_cbc(const uint8_t* key, const uint8_t* iv, const uint8_t* data, uint32_t len, uint8_t* out);
|
||||
uint8_t aes_cmac(const uint8_t* key, const uint8_t* data, uint32_t len, uint8_t* out);
|
||||
|
||||
#define aes_import_param(__OUT__, __IN__, __LEN__) rev32_all((uint32_t*)__OUT__, (uint32_t*)__IN__, __LEN__)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
/*!
|
||||
* \file CM3.h
|
||||
* \brief FIPS 197 compliant software AES implementations optimized for cortex-m3/4
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 9 Jun 2018
|
||||
*/
|
||||
|
||||
#ifndef AES_CM3_H
|
||||
#define AES_CM3_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
void CM3_1T_AES_128_keyschedule_enc(uint8_t* rk, const uint8_t* key);
|
||||
void CM3_1T_AES_192_keyschedule_enc(uint8_t* rk, const uint8_t* key);
|
||||
void CM3_1T_AES_256_keyschedule_enc(uint8_t* rk, const uint8_t* key);
|
||||
|
||||
void CM3_1T_AES_keyschedule_dec(uint8_t* rk, size_t rounds);
|
||||
|
||||
void CM3_1T_AES_encrypt(const uint8_t* rk, const uint8_t* in, uint8_t* out, size_t rounds);
|
||||
void CM3_1T_AES_decrypt(const uint8_t* rk, const uint8_t* in, uint8_t* out, size_t rounds);
|
||||
|
||||
#endif // AES_CM3_H
|
|
@ -0,0 +1,69 @@
|
|||
/*!
|
||||
* \file CM3_1T_AES_128_keyschedule_enc.S
|
||||
* \brief
|
||||
*
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 9 jun 2018
|
||||
*/
|
||||
|
||||
// 10 rounds of rcon can be computed as left shift + conditional reload of rcon to 0x1b after 0x80
|
||||
// it can also serve as loop counter to reduce register pressure
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
.text
|
||||
|
||||
.align 3
|
||||
// void CM3_1T_AES_128_keyschedule_enc(uint8_t *rk, const uint8_t *key) {
|
||||
.global CM3_1T_AES_128_keyschedule_enc
|
||||
.type CM3_1T_AES_128_keyschedule_enc,%function
|
||||
CM3_1T_AES_128_keyschedule_enc:
|
||||
#if __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
|
||||
push {r4-r8, lr}
|
||||
|
||||
movw r14, #:lower16:AES_Te2
|
||||
movt r14, #:upper16:AES_Te2
|
||||
|
||||
ldmia.w r1, {r2-r5} // load key once // align loop entry to 8 bytes
|
||||
mov.w r1, #0x01000000 // calculate rcon in highest byte to use a carry flag
|
||||
|
||||
//just copy a key
|
||||
stmia.w r0!, {r2-r5} // align loop entry to 8 bytes
|
||||
|
||||
1: uxtb r6, r5, ror #8
|
||||
uxtb r7, r5, ror #16
|
||||
uxtb r8, r5, ror #24
|
||||
uxtb r12, r5
|
||||
|
||||
ldrb r6, [r14, r6, lsl #2] // load sbox from Te2
|
||||
ldrb r7, [r14, r7, lsl #2] // load sbox from Te2
|
||||
ldrb r8, [r14, r8, lsl #2] // load sbox from Te2
|
||||
ldrb r12, [r14, r12, lsl #2] // load sbox from Te2
|
||||
|
||||
eor r2, r2, r1, lsr #24 // rcon is in highest byte
|
||||
eors r2, r2, r6
|
||||
eor r2, r2, r7, lsl #8
|
||||
eor r2, r2, r8, lsl #16
|
||||
eor r2, r2, r12, lsl #24
|
||||
eors r3, r2
|
||||
eors r4, r3
|
||||
eors r5, r4
|
||||
|
||||
lsls r1, #1 // next rcon
|
||||
|
||||
it cs // 0x1b reduction when carry set
|
||||
movcs r1, #0x1b000000
|
||||
|
||||
cmp r1, #0x6c000000
|
||||
|
||||
//write roundkey
|
||||
stmia r0!, {r2-r5}
|
||||
|
||||
bne 1b
|
||||
|
||||
pop {r4-r8, pc}
|
||||
#else
|
||||
b . // crash in case the function was called on thumb1 core
|
||||
#endif
|
|
@ -0,0 +1,66 @@
|
|||
/*!
|
||||
* \file CM3_1T_AES_192_keyschedule_enc.S
|
||||
* \brief
|
||||
*
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 9 jun 2018
|
||||
*/
|
||||
|
||||
// 8 rounds of rcon can be computed as left shift only
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
.text
|
||||
|
||||
.align 3
|
||||
// void CM3_1T_AES_192_keyschedule_enc(uint8_t *rk, const uint8_t *key) {
|
||||
.global CM3_1T_AES_192_keyschedule_enc
|
||||
.type CM3_1T_AES_192_keyschedule_enc,%function
|
||||
CM3_1T_AES_192_keyschedule_enc:
|
||||
#if __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
|
||||
push {r4-r10, lr}
|
||||
|
||||
movw r14, #:lower16:AES_Te2
|
||||
movt r14, #:upper16:AES_Te2
|
||||
|
||||
ldmia.w r1, {r2-r7} // load key // align loop entry to 8 bytes
|
||||
mov.w r1, #0x01000000 // calculate rcon in highest byte to use a carry flag
|
||||
|
||||
//just copy a key
|
||||
stmia.w r0!, {r2-r7} // align loop entry to 8 bytes
|
||||
|
||||
1: uxtb r8, r7, ror #8
|
||||
uxtb r9, r7, ror #16
|
||||
uxtb r10, r7, ror #24
|
||||
uxtb r12, r7
|
||||
|
||||
ldrb r8, [r14, r8, lsl #2] // load sbox from Te2
|
||||
ldrb r9, [r14, r9, lsl #2] // load sbox from Te2
|
||||
ldrb r10, [r14, r10, lsl #2] // load sbox from Te2
|
||||
ldrb r12, [r14, r12, lsl #2] // load sbox from Te2
|
||||
|
||||
eor r2, r2, r1, lsr #24 // rcon is in highest byte
|
||||
eor r2, r2, r8
|
||||
eor r2, r2, r9, lsl #8
|
||||
eor r2, r2, r10, lsl #16
|
||||
eor r2, r2, r12, lsl #24
|
||||
eors r3, r2
|
||||
eors r4, r3
|
||||
eors r5, r4
|
||||
|
||||
lsls r1, #1 // next rcon
|
||||
bcs 2f // last round when carry is set
|
||||
|
||||
eors r6, r5
|
||||
eors r7, r6
|
||||
|
||||
stmia r0!, {r2-r7} // write full roundkey
|
||||
b 1b
|
||||
|
||||
2: stmia r0!, {r2-r5} // write only 4 words at the last round
|
||||
pop {r4-r10, pc}
|
||||
#else
|
||||
b . // crash in case the function was called on thumb1 core
|
||||
#endif
|
|
@ -0,0 +1,97 @@
|
|||
/*!
|
||||
* \file CM3_1T_AES_256_keyschedule_enc.S
|
||||
* \brief
|
||||
*
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 9 jun 2018
|
||||
*/
|
||||
|
||||
// 7 rounds of rcon can be computed as left shift only
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
.text
|
||||
|
||||
.align 3
|
||||
// void CM3_1T_AES_256_keyschedule_enc(uint8_t *rk, const uint8_t *key) {
|
||||
.global CM3_1T_AES_256_keyschedule_enc
|
||||
.type CM3_1T_AES_256_keyschedule_enc,%function
|
||||
CM3_1T_AES_256_keyschedule_enc:
|
||||
#if __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
|
||||
push {r4-r11, lr}
|
||||
|
||||
ldmia r1!, {r2-r9} // load key
|
||||
|
||||
movw r1, #:lower16:AES_Te2
|
||||
movt r1, #:upper16:AES_Te2
|
||||
str r1, [sp, #-8]! // put and reserve Te2 on stack before looping
|
||||
|
||||
mov.w r1, #0x01000000 // calculate rcon in highest byte to use a carry flag
|
||||
|
||||
//sp+0 - Te2
|
||||
//sp+4 - rcon
|
||||
|
||||
1: stmia r0!, {r2-r9} // store initial or previous round
|
||||
|
||||
eor r2, r2, r1, lsr #24 // rcon is in highest byte
|
||||
lsls r1, #1 // next rcon
|
||||
str r1, [sp, #4] // spill rcon
|
||||
|
||||
uxtb r10, r9, ror #8
|
||||
uxtb r11, r9, ror #16
|
||||
uxtb r12, r9, ror #24
|
||||
uxtb r14, r9
|
||||
|
||||
ldr.w r1, [sp, #0] // get Te2 // cannot be pipelined anyway // align loads to 4 bytes
|
||||
|
||||
ldrb r10, [r1, r10, lsl #2] // load sbox from Te2
|
||||
ldrb r11, [r1, r11, lsl #2] // load sbox from Te2
|
||||
ldrb r12, [r1, r12, lsl #2] // load sbox from Te2
|
||||
ldrb r14, [r1, r14, lsl #2] // load sbox from Te2
|
||||
|
||||
eor r2, r2, r10
|
||||
eor r2, r2, r11, lsl #8
|
||||
eor r2, r2, r12, lsl #16
|
||||
eor r2, r2, r14, lsl #24
|
||||
|
||||
// instead of 3x eor.w + bmi.w, put eors after branch and in epilogue // somehow saves 6 cycles at 0ws
|
||||
bmi 2f
|
||||
|
||||
eors r3, r2
|
||||
eors r4, r3
|
||||
eors r5, r4
|
||||
|
||||
uxtb r10, r5, ror #16
|
||||
uxtb r11, r5, ror #8
|
||||
uxtb r12, r5
|
||||
uxtb r14, r5, ror #24
|
||||
|
||||
ldrb r10, [r1, r10, lsl #2] // load sbox from Te2
|
||||
ldrb r11, [r1, r11, lsl #2] // load sbox from Te2
|
||||
ldrb r12, [r1, r12, lsl #2] // load sbox from Te2
|
||||
ldrb r14, [r1, r14, lsl #2] // load sbox from Te2
|
||||
ldr.w r1, [sp, #4] // get rcon // will lose cycles if not .w
|
||||
|
||||
eor r6, r6, r10, lsl #16
|
||||
eor r6, r6, r11, lsl #8
|
||||
eor r6, r12
|
||||
eor r6, r6, r14, lsl #24
|
||||
eors r7, r6
|
||||
eor r8, r7
|
||||
eor r9, r8
|
||||
|
||||
b 1b
|
||||
|
||||
2: eors r3, r2
|
||||
eors r4, r3
|
||||
eors r5, r4
|
||||
|
||||
stmia r0!, {r2-r5} // write only 4 words at the last round
|
||||
|
||||
add sp, #8 // faster than dummy pops
|
||||
pop {r4-r11, pc}
|
||||
#else
|
||||
b . // crash in case the function was called on thumb1 core
|
||||
#endif
|
|
@ -0,0 +1,203 @@
|
|||
/*!
|
||||
* \file CM3_1T_AES_decrypt.S
|
||||
* \brief
|
||||
*
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* Peter Schwabe & Ko Stoffelen @2016
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 9 jun 2018
|
||||
*/
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
.text
|
||||
|
||||
.align 3
|
||||
// void CM3_1T_AES_decrypt(uint8_t* rk, const uint8_t* in, uint8_t* out, size_t rounds) {
|
||||
.global CM3_1T_AES_decrypt
|
||||
.type CM3_1T_AES_decrypt,%function
|
||||
CM3_1T_AES_decrypt:
|
||||
#if __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
|
||||
adds r0, #16 //to compare against before final round
|
||||
push {r0, r2, r4-r11, lr} //stack rk+16, out
|
||||
|
||||
movw r14, #:lower16:AES_Td2
|
||||
movt r14, #:upper16:AES_Td2
|
||||
|
||||
//rk_end = rk+16 + rounds * 16
|
||||
add r12, r0, r3, lsl #4
|
||||
|
||||
//load input
|
||||
ldmia r1!, {r4-r7}
|
||||
|
||||
//load initial round key
|
||||
ldmdb r12!, {r0-r3}
|
||||
|
||||
//initial addroundkey
|
||||
eors r4, r0
|
||||
eors r5, r1
|
||||
eors r6, r2
|
||||
eors r7, r3
|
||||
|
||||
1: uxtb r0, r4
|
||||
uxtb r1, r5
|
||||
uxtb r2, r6
|
||||
uxtb r3, r7
|
||||
|
||||
#ifdef __ARM_ARCH_7EM__
|
||||
// aggregate loads by source in case it lies in different memory blocks
|
||||
ldr r0, [r14, r0, lsl #2]
|
||||
ldr r1, [r14, r1, lsl #2]
|
||||
ldr r2, [r14, r2, lsl #2]
|
||||
ldr r3, [r14, r3, lsl #2]
|
||||
ldr r9, [r12, #-12]
|
||||
ldr r10, [r12, #-8]
|
||||
ldr r11, [r12, #-4]
|
||||
ldr r8, [r12, #-16]!
|
||||
#else // cm3 can't pre index anywhere but first load
|
||||
ldr r8, [r12, #-16]!
|
||||
ldr r0, [r14, r0, lsl #2]
|
||||
ldr r1, [r14, r1, lsl #2]
|
||||
ldr r2, [r14, r2, lsl #2]
|
||||
ldr r3, [r14, r3, lsl #2]
|
||||
ldr r9, [r12, #4]
|
||||
ldr r10, [r12, #8]
|
||||
ldr r11, [r12, #12]
|
||||
#endif
|
||||
|
||||
eor r8, r8, r0, ror #16
|
||||
eor r9, r9, r1, ror #16
|
||||
eor r10, r10, r2, ror #16
|
||||
eor r11, r11, r3, ror #16
|
||||
|
||||
uxtb r0, r7, ror #8
|
||||
uxtb r1, r4, ror #8
|
||||
uxtb r2, r5, ror #8
|
||||
uxtb r3, r6, ror #8
|
||||
ldr r0, [r14, r0, lsl #2]
|
||||
ldr r1, [r14, r1, lsl #2]
|
||||
ldr r2, [r14, r2, lsl #2]
|
||||
ldr r3, [r14, r3, lsl #2]
|
||||
eor r8, r8, r0, ror #8
|
||||
eor r9, r9, r1, ror #8
|
||||
eor r10, r10, r2, ror #8
|
||||
eor r11, r11, r3, ror #8
|
||||
|
||||
uxtb r0, r6, ror #16
|
||||
uxtb r1, r7, ror #16
|
||||
uxtb r2, r4, ror #16
|
||||
uxtb r3, r5, ror #16
|
||||
|
||||
lsrs r5, #24
|
||||
lsrs r6, #24
|
||||
lsrs r7, #24
|
||||
lsrs r4, #24
|
||||
|
||||
ldr r0, [r14, r0, lsl #2]
|
||||
ldr r1, [r14, r1, lsl #2]
|
||||
ldr r2, [r14, r2, lsl #2]
|
||||
ldr r3, [r14, r3, lsl #2]
|
||||
|
||||
ldr r5, [r14, r5, lsl #2]
|
||||
ldr r6, [r14, r6, lsl #2]
|
||||
ldr r7, [r14, r7, lsl #2]
|
||||
ldr r4, [r14, r4, lsl #2]
|
||||
|
||||
// change xoring order to writeback r4-r7 without extra moves
|
||||
eor r0, r0, r5, ror #24
|
||||
eor r1, r1, r6, ror #24
|
||||
|
||||
// set flags early to optimize speculative fetches in cm3
|
||||
// cmp have to be close to branch, otherwise speculative code loads doesn't work
|
||||
ldr r5, [sp]
|
||||
cmp r5, r12
|
||||
|
||||
eor r2, r2, r7, ror #24
|
||||
eor r3, r3, r4, ror #24
|
||||
|
||||
eor r4, r8, r0
|
||||
eor r5, r9, r1
|
||||
eor r6, r10, r2
|
||||
eor r7, r11, r3
|
||||
|
||||
bne.w 1b //align following code to 4 bytes
|
||||
|
||||
// final round
|
||||
movw r14, #:lower16:AES_inv_sbox
|
||||
movt r14, #:upper16:AES_inv_sbox
|
||||
|
||||
uxtb r0, r4
|
||||
uxtb r1, r5
|
||||
uxtb r2, r6
|
||||
uxtb r3, r7
|
||||
ldr r8, [r12, #-16]
|
||||
ldr r9, [r12, #-12]
|
||||
ldr r10, [r12, #-8]
|
||||
ldr r11, [r12, #-4]
|
||||
ldrb r0, [r14, r0]
|
||||
ldrb r1, [r14, r1]
|
||||
ldrb r2, [r14, r2]
|
||||
ldrb r3, [r14, r3]
|
||||
eor r8, r0
|
||||
eor r9, r1
|
||||
eor r10, r2
|
||||
eor r11, r3
|
||||
|
||||
uxtb r0, r7, ror #8
|
||||
uxtb r1, r4, ror #8
|
||||
uxtb r2, r5, ror #8
|
||||
uxtb r3, r6, ror #8
|
||||
ldrb r0, [r14, r0]
|
||||
ldrb r1, [r14, r1]
|
||||
ldrb r2, [r14, r2]
|
||||
ldrb r3, [r14, r3]
|
||||
eor r8, r8, r0, lsl #8
|
||||
eor r9, r9, r1, lsl #8
|
||||
eor r10, r10, r2, lsl #8
|
||||
eor r11, r11, r3, lsl #8
|
||||
|
||||
uxtb r0, r6, ror #16
|
||||
uxtb r1, r7, ror #16
|
||||
uxtb r2, r4, ror #16
|
||||
uxtb r3, r5, ror #16
|
||||
|
||||
lsrs r5, #24
|
||||
lsrs r6, #24
|
||||
lsrs r7, #24
|
||||
|
||||
uxtb r12, r4, ror #24
|
||||
|
||||
ldr r4, [sp, #4] // load output pointer
|
||||
|
||||
ldrb r0, [r14, r0]
|
||||
ldrb r1, [r14, r1]
|
||||
ldrb r2, [r14, r2]
|
||||
ldrb r3, [r14, r3]
|
||||
|
||||
ldrb r5, [r14, r5]
|
||||
ldrb r6, [r14, r6]
|
||||
ldrb r7, [r14, r7]
|
||||
ldrb r12, [r14, r12]
|
||||
|
||||
eor r8, r8, r0, lsl #16
|
||||
eor r9, r9, r1, lsl #16
|
||||
eor r10, r10, r2, lsl #16
|
||||
eor r11, r11, r3, lsl #16
|
||||
|
||||
eor r0, r8, r5, lsl #24
|
||||
eor r1, r9, r6, lsl #24
|
||||
eor r2, r10, r7, lsl #24
|
||||
eor r3, r11, r12, lsl #24
|
||||
|
||||
add sp, #8 //less mem pressure than preindexed load + dummy pop
|
||||
|
||||
str r0, [r4, #0]
|
||||
str r1, [r4, #4]
|
||||
str r2, [r4, #8]
|
||||
str r3, [r4, #12]
|
||||
|
||||
pop {r4-r11, pc}
|
||||
#else
|
||||
b . // crash in case the function was called on thumb1 core
|
||||
#endif
|
|
@ -0,0 +1,206 @@
|
|||
/*!
|
||||
* \file CM3_1T_AES_encrypt.S
|
||||
* \brief
|
||||
*
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* Peter Schwabe & Ko Stoffelen @2016
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 9 jun 2018
|
||||
*/
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
.text
|
||||
|
||||
.align 3
|
||||
// void CM3_1T_AES_encrypt(uint8_t* rk, const uint8_t* in, uint8_t* out, size_t rounds) {
|
||||
.global CM3_1T_AES_encrypt
|
||||
.type CM3_1T_AES_encrypt,%function
|
||||
CM3_1T_AES_encrypt:
|
||||
#if __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
|
||||
add r3, r0, r3, lsl #4 //rk_end-16 = rk + rounds * 16
|
||||
push {r2,r3,r4-r11,lr} //stack out, rk_end-16
|
||||
|
||||
movw r14, #:lower16:AES_Te2
|
||||
movt r14, #:upper16:AES_Te2
|
||||
|
||||
mov r12, r0
|
||||
|
||||
//load input
|
||||
ldmia r1!, {r4-r7}
|
||||
//load key
|
||||
ldmia r12!, {r0-r3}
|
||||
|
||||
//initial addroundkey
|
||||
eors r4, r0
|
||||
eors r5, r1
|
||||
eors r6, r2
|
||||
eors r7, r3
|
||||
|
||||
1: uxtb r0, r4
|
||||
uxtb r1, r5
|
||||
uxtb r2, r6
|
||||
uxtb r3, r7
|
||||
|
||||
#ifdef __ARM_ARCH_7EM__
|
||||
// aggregate loads by source in case it lies in different memory blocks
|
||||
ldr r0, [r14, r0, lsl #2]
|
||||
ldr r1, [r14, r1, lsl #2]
|
||||
ldr r2, [r14, r2, lsl #2]
|
||||
ldr r3, [r14, r3, lsl #2]
|
||||
ldr r9, [r12, #4]
|
||||
ldr r10, [r12, #8]
|
||||
ldr r11, [r12, #12]
|
||||
ldr r8, [r12], #16
|
||||
#else // cm3 can't post index anywhere but first load
|
||||
ldr r8, [r12], #16
|
||||
ldr r0, [r14, r0, lsl #2]
|
||||
ldr r1, [r14, r1, lsl #2]
|
||||
ldr r2, [r14, r2, lsl #2]
|
||||
ldr r3, [r14, r3, lsl #2]
|
||||
ldr r9, [r12, #-12]
|
||||
ldr r10, [r12, #-8]
|
||||
ldr r11, [r12, #-4]
|
||||
#endif
|
||||
|
||||
eor r8, r8, r0, ror #16
|
||||
eor r9, r9, r1, ror #16
|
||||
eor r10, r10, r2, ror #16
|
||||
eor r11, r11, r3, ror #16
|
||||
|
||||
uxtb r0, r5, ror #8
|
||||
uxtb r1, r6, ror #8
|
||||
uxtb r2, r7, ror #8
|
||||
uxtb r3, r4, ror #8
|
||||
ldr r0, [r14, r0, lsl #2]
|
||||
ldr r1, [r14, r1, lsl #2]
|
||||
ldr r2, [r14, r2, lsl #2]
|
||||
ldr r3, [r14, r3, lsl #2]
|
||||
eor r8, r8, r0, ror #8
|
||||
eor r9, r9, r1, ror #8
|
||||
eor r10, r10, r2, ror #8
|
||||
eor r11, r11, r3, ror #8
|
||||
|
||||
uxtb r0, r6, ror #16
|
||||
uxtb r1, r7, ror #16
|
||||
uxtb r2, r4, ror #16
|
||||
uxtb r3, r5, ror #16
|
||||
|
||||
lsrs r7, #24
|
||||
lsrs r4, #24
|
||||
lsrs r5, #24
|
||||
lsrs r6, #24
|
||||
|
||||
ldr r0, [r14, r0, lsl #2]
|
||||
ldr r1, [r14, r1, lsl #2]
|
||||
ldr r2, [r14, r2, lsl #2]
|
||||
ldr r3, [r14, r3, lsl #2]
|
||||
|
||||
ldr r7, [r14, r7, lsl #2]
|
||||
ldr r4, [r14, r4, lsl #2]
|
||||
ldr r5, [r14, r5, lsl #2]
|
||||
ldr r6, [r14, r6, lsl #2]
|
||||
|
||||
// change xoring order to writeback r4-r7 without extra moves
|
||||
eor r0, r0, r7, ror #24
|
||||
eor r1, r1, r4, ror #24
|
||||
|
||||
// set flags early to optimize speculative fetches in cm3
|
||||
// cmp have to be close to branch, otherwise speculative code loads doesn't work
|
||||
ldr r7, [sp, #4]
|
||||
cmp r7, r12
|
||||
|
||||
eor r2, r2, r5, ror #24
|
||||
eor r3, r3, r6, ror #24
|
||||
|
||||
eor r4, r8, r0
|
||||
eor r5, r9, r1
|
||||
eor r6, r10, r2
|
||||
eor r7, r11, r3
|
||||
|
||||
bne.w 1b //align following code to 4 bytes
|
||||
|
||||
//final round
|
||||
//row 3 - ST3x
|
||||
lsrs r0, r7, #24
|
||||
lsrs r1, r4, #24
|
||||
lsrs r2, r5, #24
|
||||
lsrs r3, r6, #24
|
||||
|
||||
//row 2 - ST2x
|
||||
uxtb r8, r6, ror #16
|
||||
uxtb r9, r7, ror #16
|
||||
uxtb r10, r4, ror #16
|
||||
uxtb r11, r5, ror #16
|
||||
|
||||
ldrb r0, [r14, r0, lsl #2]
|
||||
ldrb r1, [r14, r1, lsl #2]
|
||||
ldrb r2, [r14, r2, lsl #2]
|
||||
ldrb r3, [r14, r3, lsl #2]
|
||||
|
||||
ldrb r8, [r14, r8, lsl #2]
|
||||
ldrb r9, [r14, r9, lsl #2]
|
||||
ldrb r10, [r14, r10, lsl #2]
|
||||
ldrb r11, [r14, r11, lsl #2]
|
||||
|
||||
//repack upper part (keep in bottom half)
|
||||
orr r8, r8, r0, lsl #8
|
||||
orr r9, r9, r1, lsl #8
|
||||
orr r10, r10, r2, lsl #8
|
||||
orr r11, r11, r3, lsl #8
|
||||
|
||||
//row 1 - ST1x
|
||||
uxtb r0, r5, ror #8
|
||||
uxtb r1, r6, ror #8
|
||||
uxtb r2, r7, ror #8
|
||||
uxtb r3, r4, ror #8
|
||||
|
||||
//row 0 - ST0x
|
||||
uxtb r4, r4
|
||||
uxtb r5, r5
|
||||
uxtb r6, r6
|
||||
uxtb r7, r7
|
||||
|
||||
ldrb r0, [r14, r0, lsl #2]
|
||||
ldrb r1, [r14, r1, lsl #2]
|
||||
ldrb r2, [r14, r2, lsl #2]
|
||||
ldrb r3, [r14, r3, lsl #2]
|
||||
|
||||
ldrb r4, [r14, r4, lsl #2]
|
||||
ldrb r5, [r14, r5, lsl #2]
|
||||
ldrb r6, [r14, r6, lsl #2]
|
||||
ldrb r7, [r14, r7, lsl #2]
|
||||
|
||||
//repack bottom part
|
||||
orr r4, r4, r0, lsl #8
|
||||
orr r5, r5, r1, lsl #8
|
||||
orr r6, r6, r2, lsl #8
|
||||
orr r7, r7, r3, lsl #8
|
||||
|
||||
//repack wholly
|
||||
orr r4, r4, r8, lsl #16
|
||||
orr r5, r5, r9, lsl #16
|
||||
orr r6, r6, r10, lsl #16
|
||||
orr r8, r7, r11, lsl #16 // unstack into r7
|
||||
|
||||
ldr r7, [sp], #8 // load output pointer and clear stack
|
||||
ldr r0, [r12]
|
||||
ldr r1, [r12, #4]
|
||||
ldr r2, [r12, #8]
|
||||
ldr r3, [r12, #12]
|
||||
|
||||
eors r0, r4
|
||||
eors r1, r5
|
||||
eors r2, r6
|
||||
eor.w r3, r8
|
||||
|
||||
str r0, [r7, #0]
|
||||
str r1, [r7, #4]
|
||||
str r2, [r7, #8]
|
||||
str r3, [r7, #12]
|
||||
|
||||
pop {r4-r11,pc}
|
||||
#else
|
||||
b . //crash in case the function was called on thumb1 core
|
||||
#endif
|
|
@ -0,0 +1,67 @@
|
|||
/*!
|
||||
* \file CM3_1T_AES_keyschedule_dec.S
|
||||
* \brief
|
||||
*
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 9 jun 2018
|
||||
*/
|
||||
|
||||
// performs perform equivalent inverse cipher transformation on expanded encryption key
|
||||
// order of round keys is not inverted - decryption will read it in reverse
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
.text
|
||||
|
||||
.align 3
|
||||
// void CM3_1T_AES_keyschedule_dec(uint8_t* rk, size_t rounds) {
|
||||
.global CM3_1T_AES_keyschedule_dec
|
||||
.type CM3_1T_AES_keyschedule_dec,%function
|
||||
CM3_1T_AES_keyschedule_dec:
|
||||
#if __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
|
||||
push {r4-r5, lr}
|
||||
|
||||
//first and last block is ommited
|
||||
//rk_end-16 = rk + rounds * 16
|
||||
add r1, r0, r1, lsl #4
|
||||
adds r0, #16
|
||||
|
||||
movw r14, #:lower16:AES_Te2
|
||||
movt r14, #:upper16:AES_Te2
|
||||
movw r12, #:lower16:AES_Td2
|
||||
movt r12, #:upper16:AES_Td2
|
||||
|
||||
1: ldr r2, [r1, #-4]! // can also go forward on r0 but requires negative offset on store
|
||||
|
||||
uxtb r4, r2, ror #8
|
||||
uxtb r3, r2
|
||||
lsrs r5, r2, #24
|
||||
uxtb r2, r2, ror #16
|
||||
|
||||
ldrb r3, [r14, r3, lsl #2] // load sbox from Te2
|
||||
ldrb r4, [r14, r4, lsl #2] // load sbox from Te2
|
||||
ldrb r2, [r14, r2, lsl #2] // load sbox from Te2
|
||||
ldrb r5, [r14, r5, lsl #2] // load sbox from Te2
|
||||
ldr r3, [r12, r3, lsl #2]
|
||||
ldr r4, [r12, r4, lsl #2]
|
||||
ldr r2, [r12, r2, lsl #2]
|
||||
ldr r5, [r12, r5, lsl #2]
|
||||
|
||||
// set flags early to optimize speculative fetches in cm3
|
||||
// cmp have to be close to branch, otherwise speculative code loads doesn't work
|
||||
cmp r1, r0
|
||||
|
||||
eor r2, r2, r3, ror #16
|
||||
eor r2, r2, r4, ror #8
|
||||
eor r2, r2, r5, ror #24
|
||||
|
||||
str r2, [r1] // write back transformed key
|
||||
|
||||
bne 1b
|
||||
|
||||
pop {r4-r5, pc}
|
||||
#else
|
||||
b . //crash in case the function was called on thumb1 core
|
||||
#endif
|
|
@ -0,0 +1,26 @@
|
|||
/*!
|
||||
* \file CM7.h
|
||||
* \brief FIPS 197 compliant software AES implementations optimized for cortex-m7
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 17 Jun 2018
|
||||
*/
|
||||
|
||||
#ifndef AES_CM7_H
|
||||
#define AES_CM7_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include "common.h"
|
||||
|
||||
APP_RAMFUNC void CM7_1T_AES_128_keyschedule_enc(uint8_t* rk, const uint8_t* key);
|
||||
APP_RAMFUNC void CM7_1T_AES_192_keyschedule_enc(uint8_t* rk, const uint8_t* key);
|
||||
APP_RAMFUNC void CM7_1T_AES_256_keyschedule_enc(uint8_t* rk, const uint8_t* key);
|
||||
|
||||
APP_RAMFUNC void CM7_1T_AES_keyschedule_dec(uint8_t* rk, size_t rounds);
|
||||
|
||||
APP_RAMFUNC void CM7_1T_AES_encrypt(const uint8_t* rk, const uint8_t* in, uint8_t* out, size_t rounds);
|
||||
APP_RAMFUNC void CM7_1T_AES_decrypt(const uint8_t* rk, const uint8_t* in, uint8_t* out, size_t rounds);
|
||||
|
||||
#endif // AES_CM7_H
|
|
@ -0,0 +1,84 @@
|
|||
/*!
|
||||
* \file CM7_1T_AES_128_keyschedule_enc.S
|
||||
* \brief
|
||||
*
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 17 Jun 2018
|
||||
*/
|
||||
|
||||
// 10 rounds of rcon can be computed as left shift + conditional reload of rcon to 0x1b after 0x80
|
||||
// it can also serve as loop counter to reduce register pressure
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
.text
|
||||
.section .ramfunc.$SRAM_ITC
|
||||
|
||||
.align 3
|
||||
// void CM7_1T_AES_128_keyschedule_enc(uint8_t *rk, const uint8_t *key) {
|
||||
.global CM7_1T_AES_128_keyschedule_enc
|
||||
.type CM7_1T_AES_128_keyschedule_enc,%function
|
||||
CM7_1T_AES_128_keyschedule_enc:
|
||||
#if __ARM_ARCH_7EM__
|
||||
push {r4-r8, lr}
|
||||
|
||||
//load key once
|
||||
ldmia.w r1, {r2-r5} // align loop entry
|
||||
|
||||
// calculate rcon in highest byte to use a carry flag
|
||||
// use constructed immediate rather than shifted one for more issuable constants
|
||||
mov.w r1, #0x01010101
|
||||
str.w r2, [r0], #4 // just copy a keys
|
||||
|
||||
movw r14, #:lower16:AES_Te2
|
||||
str.w r3, [r0], #4
|
||||
|
||||
movt r14, #:upper16:AES_Te2
|
||||
str.w r4, [r0], #4 // r5 stored inside of the loop
|
||||
|
||||
1: and.w r12, r5, #0xff
|
||||
eor r2, r2, r1, lsr #24 // rcon is in highest byte
|
||||
|
||||
lsr.w r8, r5, #24
|
||||
lsls r1, #1
|
||||
|
||||
uxtb r7, r5, ror #16
|
||||
ldrb r12, [r14, r12, lsl #2] // load sbox from Te2
|
||||
|
||||
uxtb r6, r5, ror #8
|
||||
ldrb r8, [r14, r8, lsl #2] // load sbox from Te2
|
||||
|
||||
// better than `it` instruction // works the same in younger and older slot
|
||||
// needs 2 cycle clearance from lsls, .n instruction within 3 cycles above
|
||||
bcc 2f
|
||||
mov r1, #0x1b1b1b1b
|
||||
2: ldrb r7, [r14, r7, lsl #2] // load sbox from Te2
|
||||
|
||||
eor r2, r2, r12, lsl #24
|
||||
ldrb r6, [r14, r6, lsl #2] // load sbox from Te2
|
||||
|
||||
eor r2, r2, r8, lsl #16
|
||||
str r5, [r0], #4 // from a previous loop or prologue
|
||||
|
||||
eor r2, r2, r7, lsl #8
|
||||
cmp.w r1, #0x6c6c6c6c
|
||||
|
||||
eor.w r2, r6
|
||||
str r2, [r0], #4
|
||||
|
||||
eor.w r3, r2
|
||||
str r3, [r0], #4
|
||||
|
||||
eor.w r4, r3
|
||||
str r4, [r0], #4
|
||||
|
||||
eor.w r5, r4
|
||||
bne 1b
|
||||
str r5, [r0]
|
||||
|
||||
pop {r4-r8, pc}
|
||||
#else
|
||||
b . //crash in case the function was called on thumb1 core
|
||||
#endif
|
|
@ -0,0 +1,81 @@
|
|||
/*!
|
||||
* \file CM7_1T_AES_192_keyschedule_enc.S
|
||||
* \brief
|
||||
*
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 17 Jun 2018
|
||||
*/
|
||||
|
||||
// 8 rounds of rcon can be computed as left shift only
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
.text
|
||||
.section .ramfunc.$SRAM_ITC
|
||||
|
||||
.align 3
|
||||
// void CM7_1T_AES_192_keyschedule_enc(uint8_t *rk, const uint8_t *key) {
|
||||
.global CM7_1T_AES_192_keyschedule_enc
|
||||
.type CM7_1T_AES_192_keyschedule_enc,%function
|
||||
CM7_1T_AES_192_keyschedule_enc:
|
||||
#if __ARM_ARCH_7EM__
|
||||
push {r4-r10, lr}
|
||||
|
||||
movw r14, #:lower16:AES_Te2
|
||||
movt r14, #:upper16:AES_Te2
|
||||
|
||||
//load key
|
||||
ldmia.w r1!, {r2-r7} // align loop entry
|
||||
|
||||
mov.w r1, #0x01000000 // calculate rcon in highest byte to use a carry flag
|
||||
|
||||
//just copy a key
|
||||
stmia.w r0!, {r2-r5} // align loop entry // r6, r7 stored at beggining of the loop
|
||||
|
||||
1: and.w r12, r7, #0xff
|
||||
lsr.w r10, r7, #24
|
||||
|
||||
strd r6,r7, [r0], #8
|
||||
|
||||
uxtb r9, r7, ror #16
|
||||
ldrb r12, [r14, r12, lsl #2] // load sbox from Te2
|
||||
|
||||
uxtb r8, r7, ror #8
|
||||
ldrb r10, [r14, r10, lsl #2] // load sbox from Te2
|
||||
|
||||
eor r2, r2, r1, lsr #24 // rcon is in highest byte
|
||||
ldrb r9, [r14, r9, lsl #2] // load sbox from Te2
|
||||
|
||||
eor r2, r2, r12, lsl #24
|
||||
ldrb r8, [r14, r8, lsl #2] // load sbox from Te2
|
||||
|
||||
eor r2, r2, r10, lsl #16
|
||||
lsls.w r1, #1 // next rcon // cant .n even when epilogue aligned
|
||||
|
||||
orr r8, r8, r9, lsl #8 // lower 16 bits
|
||||
eor.w r3, r2 // start now, there is bubble anyway
|
||||
|
||||
eor.w r2, r8 // finish r2
|
||||
eor.w r3, r8
|
||||
|
||||
eor.w r4, r3
|
||||
str.w r2, [r0], #4
|
||||
|
||||
eor.w r5, r4
|
||||
str.w r3, [r0], #4
|
||||
|
||||
strd r4,r5, [r0], #8
|
||||
|
||||
bcs 2f
|
||||
eors r6, r5
|
||||
|
||||
eors r7, r6
|
||||
b 1b // can bcc here at +1 cycles
|
||||
|
||||
2:
|
||||
pop {r4-r10, pc}
|
||||
#else
|
||||
b . //crash in case the function was called on thumb1 core
|
||||
#endif
|
|
@ -0,0 +1,113 @@
|
|||
/*!
|
||||
* \file CM7_1T_AES_256_keyschedule_enc.S
|
||||
* \brief
|
||||
*
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 17 Jun 2018
|
||||
*/
|
||||
|
||||
// 7 rounds of rcon can be computed as left shift only
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
.text
|
||||
.section .ramfunc.$SRAM_ITC
|
||||
|
||||
.align 3
|
||||
// void CM7_1T_AES_256_keyschedule_enc(uint8_t *rk, const uint8_t *key) {
|
||||
.global CM7_1T_AES_256_keyschedule_enc
|
||||
.type CM7_1T_AES_256_keyschedule_enc,%function
|
||||
CM7_1T_AES_256_keyschedule_enc:
|
||||
#if __ARM_ARCH_7EM__
|
||||
push {r4-r11, lr}
|
||||
|
||||
//load key
|
||||
ldmia.w r1!, {r2-r9}
|
||||
|
||||
strd r2,r3, [r0], #8 // just copy a key // 2x strd to keep loop aligned
|
||||
|
||||
movw r14, #:lower16:AES_Te2 // top loaded in loop
|
||||
mov.w r1, #0x01000000 // calculate rcon in highest byte to use negative flag
|
||||
|
||||
strd r4,r5, [r0], #8 // upper part is stored in first half of the loop
|
||||
|
||||
1: uxtb r10, r9, ror #8
|
||||
movt r14, #:upper16:AES_Te2
|
||||
|
||||
and.w r11, r9, #0xff
|
||||
str.w r6, [r0], #4
|
||||
|
||||
uxtb.w r12, r9, ror #16
|
||||
ldrb r10, [r14, r10, lsl #2] // load sbox from Te2
|
||||
|
||||
eor.w r2, r2, r1, lsr #24 // rcon is in highest byte
|
||||
ldrb r11, [r14, r11, lsl #2] // load sbox from Te2
|
||||
|
||||
eor.w r2, r10
|
||||
ldrb r12, [r14, r12, lsl #2] // load sbox from Te2
|
||||
|
||||
lsr.w r10, r9, #24 // early alu can be consumed by load next cycle
|
||||
str.w r7, [r0], #4
|
||||
|
||||
eor.w r2, r2, r11, lsl #24
|
||||
ldrb r10, [r14, r10, lsl #2] // load sbox from Te2
|
||||
|
||||
eor.w r2, r2, r12, lsl #8
|
||||
lsls r1, #1
|
||||
|
||||
eor.w r3, r2 // start now, there is bubble anyway
|
||||
str.w r8, [r0], #4
|
||||
|
||||
eor.w r3, r3, r10, lsl #16
|
||||
str.w r9, [r0], #4
|
||||
|
||||
eor.w r4, r3
|
||||
eor.w r2, r2, r10, lsl #16 // finish r2
|
||||
|
||||
eor.w r5, r4
|
||||
bmi 2f
|
||||
|
||||
uxtb r10, r5, ror #8
|
||||
and.w r11, r5, #0xff
|
||||
|
||||
uxtb.w r12, r5, ror #16
|
||||
str.w r2, [r0], #4
|
||||
|
||||
nop
|
||||
ldrb r11, [r14, r11, lsl #2] // load sbox from Te2
|
||||
|
||||
nop
|
||||
ldrb r10, [r14, r10, lsl #2] // load sbox from Te2
|
||||
|
||||
eor.w r6, r11
|
||||
ldrb r12, [r14, r12, lsl #2] // load sbox from Te2
|
||||
|
||||
lsr.w r11, r5, #24 // early alu can be consumed by load next cycle
|
||||
str.w r3, [r0], #4
|
||||
|
||||
eor.w r6, r6, r10, lsl #8
|
||||
ldrb r11, [r14, r11, lsl #2] // load sbox from Te2
|
||||
|
||||
eor.w r6, r6, r12, lsl #16
|
||||
str.w r4, [r0], #4
|
||||
|
||||
eors r7, r6 // start now, there is bubble anyway
|
||||
str.w r5, [r0], #4
|
||||
|
||||
eor.w r7, r7, r11, lsl #24
|
||||
nop
|
||||
|
||||
eor.w r8, r7
|
||||
eor.w r6, r6, r11, lsl #24 // finish r6
|
||||
|
||||
eor.w r9, r8
|
||||
b 1b
|
||||
|
||||
2:
|
||||
stmia r0!, {r2-r5}
|
||||
pop {r4-r11, pc}
|
||||
#else
|
||||
b . //crash in case the function was called on thumb1 core
|
||||
#endif
|
|
@ -0,0 +1,525 @@
|
|||
/*!
|
||||
* \file CM7_1T_AES_decrypt.S
|
||||
* \brief
|
||||
*
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 17 jun 2018
|
||||
*/
|
||||
|
||||
// uses backward expanded round key
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
.text
|
||||
.section .ramfunc.$SRAM_ITC
|
||||
|
||||
.align 3
|
||||
// void CM7_1T_AES_decrypt(uint8_t* rk, const uint8_t* in, uint8_t* out, size_t rounds) {
|
||||
.global CM7_1T_AES_decrypt
|
||||
.type CM7_1T_AES_decrypt,%function
|
||||
nop // align loop entry
|
||||
nop // align loop entry
|
||||
CM7_1T_AES_decrypt:
|
||||
#if __ARM_ARCH_7EM__
|
||||
//pld here at +1 cycle?
|
||||
push {r2, r4-r11,lr} // stack out
|
||||
|
||||
movw r14, #:lower16:AES_Td2
|
||||
add r11, r0, #16 // to compare against before final round
|
||||
|
||||
movt r14, #:upper16:AES_Td2
|
||||
add r12, r11, r3, lsl #4 // rk_end = rk+16 + rounds * 16
|
||||
|
||||
//load input
|
||||
ldmia.w r1!, {r4-r7}
|
||||
|
||||
//load key
|
||||
ldmdb r12!, {r0-r3}
|
||||
|
||||
//initial addroundkey in loop
|
||||
|
||||
// global allocation
|
||||
// r11 - rk + 16
|
||||
// r12 - current rk ptr
|
||||
// r14 - Td2
|
||||
|
||||
// r0 - s00 | s10 | s20 | s30
|
||||
// r1 - s01 | s11 | s21 | s31
|
||||
// r2 - s02 | s12 | s22 | s32
|
||||
// r3 - s03 | s13 | s23 | s33
|
||||
|
||||
// rotation after Td2
|
||||
// xor 16 | 8 | 0 | 24
|
||||
|
||||
// r4 - s00T ^ s13T ^ s22T ^ s31T
|
||||
// r5 - s01T ^ s10T ^ s23T ^ s32T
|
||||
// r6 - s02T ^ s11T ^ s20T ^ s33T
|
||||
// r7 - s03T ^ s12T ^ s21T ^ s30T
|
||||
|
||||
1: // skew addroundkey to begginning of loop
|
||||
eors r0, r4
|
||||
eors r1, r5
|
||||
|
||||
eors r2, r6
|
||||
eors r3, r7
|
||||
|
||||
// start from col 0
|
||||
uxtb.w r4, r2, ror #16
|
||||
lsrs r5, r1, #24
|
||||
|
||||
uxtb r6, r0
|
||||
ldr.w r10, [r14, r5, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | s20 | s30
|
||||
// r1 - s01 | s11 | s21 |
|
||||
// r2 - s02 | s12 | | s32
|
||||
// r3 - s03 | s13 | s23 | s33
|
||||
// r4 - s22 // c0
|
||||
// r5 -
|
||||
// r6 - s00 // c0 ror16
|
||||
// r7 -
|
||||
// r8 -
|
||||
// r9 -
|
||||
// r10 - s31T // c0 ror24
|
||||
|
||||
lsrs r5, r2, #24
|
||||
ldr.w r4, [r14, r4, lsl #2]
|
||||
|
||||
uxtb r7, r1
|
||||
ldr.w r9, [r14, r6, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | s20 | s30
|
||||
// r1 - | s11 | s21 |
|
||||
// r2 - s02 | s12 | |
|
||||
// r3 - s03 | s13 | s23 | s33
|
||||
// r4 - s22T // c0
|
||||
// r5 - s32 // c1 ror24
|
||||
// r6 -
|
||||
// r7 - s01 // c1 ror16
|
||||
// r8 -
|
||||
// r9 - s00T // c0 ror16
|
||||
// r10 - s31T // c0 ror24
|
||||
|
||||
uxtb.w r6, r3, ror #8
|
||||
ldr.w r8, [r14, r5, lsl #2]
|
||||
|
||||
uxtb.w r5, r3, ror #16
|
||||
eor.w r4, r4, r10, ror #24
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | s20 | s30
|
||||
// r1 - | s11 | s21 |
|
||||
// r2 - s02 | s12 | |
|
||||
// r3 - s03 | | | s33
|
||||
// r4 - s22T ^ s31T // c0
|
||||
// r5 - s23 // c1
|
||||
// r6 - s13 // c0 ror8
|
||||
// r7 - s01 // c1 ror16
|
||||
// r8 - s32T // c1 ror24
|
||||
// r9 - s00T // c0 ror16
|
||||
// r10 -
|
||||
|
||||
eor.w r4, r4, r9, ror #16
|
||||
ldr.w r10, [r14, r7, lsl #2]
|
||||
|
||||
uxtb.w r9, r0, ror #8
|
||||
ldr.w r5, [r14, r5, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | | s20 | s30
|
||||
// r1 - | s11 | s21 |
|
||||
// r2 - s02 | s12 | |
|
||||
// r3 - s03 | | | s33
|
||||
// r4 - s22T ^ s31T ^ s00T // c0
|
||||
// r5 - s23T // c1
|
||||
// r6 - s13 // c0 ror8
|
||||
// r7 -
|
||||
// r8 - s32T // c1 ror24
|
||||
// r9 - s10 // c1 ror8
|
||||
// r10 - s01T // c1 ror16
|
||||
|
||||
uxtb.w r7, r0, ror #16
|
||||
ldr.w r6, [r14, r6, lsl #2]
|
||||
|
||||
eor.w r5, r5, r10, ror #16
|
||||
ldr.w r9, [r14, r9, lsl #2]
|
||||
|
||||
// r0 - | | | s30
|
||||
// r1 - | s11 | s21 |
|
||||
// r2 - s02 | s12 | |
|
||||
// r3 - s03 | | | s33
|
||||
// r4 - s22T ^ s31T ^ s00T // c0
|
||||
// r5 - s23 ^ s01T // c1
|
||||
// r6 - s13T // c0 ror8
|
||||
// r7 - s20 // c2
|
||||
// r8 - s32T // c1 ror24
|
||||
// r9 - s10T // c1 ror8
|
||||
// r10 -
|
||||
|
||||
eor.w r4, r4, r6, ror #8
|
||||
ldr.w r10, [r14, r7, lsl #2]
|
||||
|
||||
uxtb.w r7, r2, ror #8
|
||||
eor.w r5, r5, r8, ror #24
|
||||
|
||||
// r0 - | | | s30
|
||||
// r1 - | s11 | s21 |
|
||||
// r2 - s02 | | |
|
||||
// r3 - s03 | | | s33
|
||||
// r4 - col 0
|
||||
// r5 - s23 ^ s01T ^ s32T // c1
|
||||
// r6 -
|
||||
// r7 - s12 // c3 ror8
|
||||
// r8 -
|
||||
// r9 - s10T // c1 ror8
|
||||
// r10 - s20T // c2
|
||||
|
||||
uxtb r2, r2
|
||||
eor.w r5, r5, r9, ror #8
|
||||
|
||||
lsrs r0, #24
|
||||
ldr.w r8, [r14, r7, lsl #2]
|
||||
|
||||
// r0 - s30 // c3 ror24
|
||||
// r1 - | s11 | s21 |
|
||||
// r2 - s02 // c2 ror16
|
||||
// r3 - s03 | | | s33
|
||||
// r4 - col 0
|
||||
// r5 - col 1
|
||||
// r6 -
|
||||
// r7 -
|
||||
// r8 - s12T // c3 ror8
|
||||
// r9 -
|
||||
// r10 - s20T // c2
|
||||
|
||||
uxtb.w r9, r1, ror #16
|
||||
ldr.w r2, [r14, r2, lsl #2]
|
||||
|
||||
uxtb.w r1, r1, ror #8
|
||||
ldr.w r6, [r14, r0, lsl #2]
|
||||
|
||||
// r0 -
|
||||
// r1 - s11 // c2 ror8
|
||||
// r2 - s02T // c2 ror16
|
||||
// r3 - s03 | | | s33
|
||||
// r4 - col 0
|
||||
// r5 - col 1
|
||||
// r6 - s30T // c3 ror24
|
||||
// r7 -
|
||||
// r8 - s12T // c3 ror8
|
||||
// r9 - s21 // c3
|
||||
// r10 - s20T // c2
|
||||
|
||||
lsrs r0, r3, #24
|
||||
ldr.w r9, [r14, r9, lsl #2]
|
||||
|
||||
uxtb r7, r3
|
||||
eor.w r2, r10, r2, ror #16
|
||||
|
||||
// r0 - s33 // c2 ror24
|
||||
// r1 - s11 // c2 ror8
|
||||
// r2 - s20T ^ s02T // c2
|
||||
// r3 -
|
||||
// r4 - col 0
|
||||
// r5 - col 1
|
||||
// r6 - s30T // c3 ror24
|
||||
// r7 - s03 // c3 ror16
|
||||
// r8 - s12T // c3 ror8
|
||||
// r9 - s21T // c3
|
||||
// r10 -
|
||||
|
||||
eor.w r9, r9, r6, ror #24
|
||||
ldr.w r10, [r14, r1, lsl #2]
|
||||
|
||||
eor.w r9, r9, r8, ror #8
|
||||
ldr.w r6, [r14, r0, lsl #2]
|
||||
|
||||
ldrd r0,r1, [r12, #-16]!
|
||||
|
||||
// r0 - rk[0]
|
||||
// r1 - rk[1]
|
||||
// r2 - s20T ^ s02T // c2
|
||||
// r3 -
|
||||
// r4 - col 0
|
||||
// r5 - col 1
|
||||
// r6 - s33T // c2 ror24
|
||||
// r7 - s03 // c3 ror16
|
||||
// r8 -
|
||||
// r9 - s21T ^ s30T ^ s12T // c3
|
||||
// r10 - s11T // c2 ror8
|
||||
|
||||
cmp r11, r12 // cmp early
|
||||
ldr.w r7, [r14, r7, lsl #2]
|
||||
|
||||
eor.w r10, r2, r10, ror #8
|
||||
ldr r2, [r12, #8]
|
||||
|
||||
eor.w r6, r10, r6, ror #24
|
||||
ldr r3, [r12, #12]
|
||||
|
||||
eor.w r7, r9, r7, ror #16
|
||||
bne 1b
|
||||
|
||||
eors r0, r4
|
||||
eors r1, r5
|
||||
|
||||
eors r2, r6
|
||||
eors r3, r7
|
||||
|
||||
// final round
|
||||
movw r14, #:lower16:AES_inv_sbox
|
||||
movt r14, #:upper16:AES_inv_sbox
|
||||
|
||||
// r0 - s00 | s10 | s20 | s30
|
||||
// r1 - s01 | s11 | s21 | s31
|
||||
// r2 - s02 | s12 | s22 | s32
|
||||
// r3 - s03 | s13 | s23 | s33
|
||||
|
||||
//final
|
||||
// - s00`| s13`| s22`| s31`
|
||||
// - s01`| s10`| s23`| s32`
|
||||
// - s02`| s11`| s20`| s33`
|
||||
// - s03`| s12`| s21`| s30`
|
||||
|
||||
// start from col 0
|
||||
uxtb r4, r0
|
||||
lsrs r7, r1, #24
|
||||
|
||||
uxtab r6, r14, r2, ror #16
|
||||
ldrb r10, [r14, r7]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | s20 | s30
|
||||
// r1 - s01 | s11 | s21 |
|
||||
// r2 - s02 | s12 | | s32
|
||||
// r3 - s03 | s13 | s23 | s33
|
||||
// r4 - s00 //r0c0
|
||||
// r5 -
|
||||
// r6 - s22 //r2c0
|
||||
// r7 -
|
||||
// r8 -
|
||||
// r9 -
|
||||
// r10 - s31` //r3c0
|
||||
// r11 -
|
||||
|
||||
uxtab r7, r14, r3, ror #8
|
||||
ldrb r9, [r14, r4]
|
||||
|
||||
lsrs r5, r2, #24
|
||||
ldrb r6, [r6]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | s20 | s30
|
||||
// r1 - s01 | s11 | s21 |
|
||||
// r2 - s02 | s12 | |
|
||||
// r3 - s03 | | s23 | s33
|
||||
// r4 -
|
||||
// r5 - s32 //r3c1
|
||||
// r6 - s22` //r2c0
|
||||
// r7 - s13 //r1c0
|
||||
// r8 -
|
||||
// r9 - s00` //r0c0
|
||||
// r10 - s31` //r3c0
|
||||
// r11 -
|
||||
|
||||
uxtab r4, r14, r3, ror #16
|
||||
ldrb r7, [r7]
|
||||
|
||||
orr.w r10, r6, r10, lsl #8
|
||||
ldrb r8, [r14, r5]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | s20 | s30
|
||||
// r1 - s01 | s11 | s21 |
|
||||
// r2 - s02 | s12 | |
|
||||
// r3 - s03 | | | s33
|
||||
// r4 - s23 //r2c1
|
||||
// r5 -
|
||||
// r6 -
|
||||
// r7 - s13` //r1c0
|
||||
// r8 - s32` //r3c1
|
||||
// r9 - s00` //r0c0
|
||||
// r10 - s22` | s31` //c0 upper
|
||||
// r11 -
|
||||
|
||||
uxtab r5, r14, r0, ror #8
|
||||
ldrb r4, [r4]
|
||||
|
||||
uxtb r6, r1
|
||||
orr.w r9, r9, r7, lsl #8
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | | s20 | s30
|
||||
// r1 - | s11 | s21 |
|
||||
// r2 - s02 | s12 | |
|
||||
// r3 - s03 | | | s33
|
||||
// r4 - s23` //r2c1
|
||||
// r5 - s10 //r1c1
|
||||
// r6 - s01 //r0c1
|
||||
// r7 -
|
||||
// r8 - s32` //r3c1
|
||||
// r9 - s00` | s13` //c0 bottom
|
||||
// r10 - s22` | s31` //c0 upper
|
||||
// r11 -
|
||||
|
||||
orr.w r7, r9, r10, lsl #16
|
||||
ldrb r5, [r5]
|
||||
|
||||
orr.w r8, r4, r8, lsl #8
|
||||
ldrb r10, [r14, r6]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | | s20 | s30
|
||||
// r1 - | s11 | s21 |
|
||||
// r2 - s02 | s12 | |
|
||||
// r3 - s03 | | | s33
|
||||
// r4 -
|
||||
// r5 - s10` //r1c1
|
||||
// r6 -
|
||||
// r7 - col 0
|
||||
// r8 - s23` | s32` //c1 upper
|
||||
// r9 -
|
||||
// r10 - s01` //r0c1
|
||||
// r11 -
|
||||
|
||||
uxtab r4, r14, r0, ror #16
|
||||
lsrs r6, r3, #24
|
||||
|
||||
uxtb r3, r3
|
||||
orr.w r10, r10, r5, lsl #8
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | | | s30
|
||||
// r1 - | s11 | s21 |
|
||||
// r2 - s02 | s12 | |
|
||||
// r3 - s03 //r0c3
|
||||
// r4 - s20 //r2c2
|
||||
// r5 -
|
||||
// r6 - s33 //r3c2
|
||||
// r7 - col 0
|
||||
// r8 - s23` | s32` //c1 upper
|
||||
// r9 -
|
||||
// r10 - s01` | s10` //c1 bottom
|
||||
// r11 -
|
||||
|
||||
uxtb r5, r2
|
||||
ldrb r9, [r14, r6]
|
||||
|
||||
uxtab r6, r14, r1, ror #8
|
||||
ldrb r4, [r4]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | | | s30
|
||||
// r1 - | | s21 |
|
||||
// r2 - | s12 | |
|
||||
// r3 - s03 //r0c3
|
||||
// r4 - s20` //r2c2
|
||||
// r5 - s02 //r0c2
|
||||
// r6 - s11 //r1c2
|
||||
// r7 - col 0
|
||||
// r8 - s23` | s32` //c1 upper
|
||||
// r9 - s33` //r3c2
|
||||
// r10 - s01` | s10` //c1 bottom
|
||||
// r11 -
|
||||
|
||||
lsrs r0, #24
|
||||
ldrb r5, [r14, r5]
|
||||
|
||||
uxtab r1, r14, r1, ror #16
|
||||
ldrb r6, [r6]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - s30 //r3c3
|
||||
// r1 - s21 //r2c3
|
||||
// r2 - | s12 | |
|
||||
// r3 - s03 //r0c3
|
||||
// r4 - s20` //r2c2
|
||||
// r5 - s02` //r0c2
|
||||
// r6 - s11` //r1c2
|
||||
// r7 - col 0
|
||||
// r8 - s23` | s32` //c1 upper
|
||||
// r9 - s33` //r3c2
|
||||
// r10 - s01` | s10` //c1 bottom
|
||||
// r11 -
|
||||
|
||||
uxtab r2, r14, r2, ror #8
|
||||
ldrb.w r0, [r14, r0]
|
||||
|
||||
orr.w r9, r4, r9, lsl #8
|
||||
ldrb r1, [r1]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - s30` //r3c3
|
||||
// r1 - s21` //r2c3
|
||||
// r2 - s12 //r1c3
|
||||
// r3 - s03 //r0c3
|
||||
// r4 -
|
||||
// r5 - s02` //r0c2
|
||||
// r6 - s11` //r1c2
|
||||
// r7 - col 0
|
||||
// r8 - s23` | s32` //c1 upper
|
||||
// r9 - s20` | s33` //c2 upper
|
||||
// r10 - s01` | s10` //c1 bottom
|
||||
// r11 -
|
||||
|
||||
orr.w r11, r5, r6, lsl #8
|
||||
ldrb r2, [r2]
|
||||
|
||||
orr.w r6, r10, r8, lsl #16
|
||||
ldrb r3, [r14, r3]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - s30` //r3c3
|
||||
// r1 - s21` //r2c3
|
||||
// r2 - s12` //r1c3
|
||||
// r3 - s03` //r0c3
|
||||
// r4 -
|
||||
// r5 -
|
||||
// r6 - col 1
|
||||
// r7 - col 0
|
||||
// r8 -
|
||||
// r9 - s20` | s33` //c2 upper
|
||||
// r10 -
|
||||
// r11 - s02` | s11` //c2 bottom
|
||||
|
||||
orr.w r10, r1, r0, lsl #8
|
||||
ldr r4, [sp, #4] // pop early to pop even number of registers later
|
||||
|
||||
orr.w r8, r3, r2, lsl #8
|
||||
ldr r5, [sp], #8 // load output pointer and clear stack
|
||||
|
||||
ldmdb r12, {r0-r3}
|
||||
|
||||
//current alloctaion
|
||||
// r0 - rk[0]
|
||||
// r1 - rk[1]
|
||||
// r2 - rk[2]
|
||||
// r3 - rk[3]
|
||||
// r4 - unstacked
|
||||
// r5 - out p
|
||||
// r6 - col 1
|
||||
// r7 - col 0
|
||||
// r8 - s03` | s12` //c3 bottom
|
||||
// r9 - s20` | s33` //c2 upper
|
||||
// r10 - s21` | s30` //c3 upper
|
||||
// r11 - s02` | s11` //c2 bottom
|
||||
|
||||
eors r0, r7
|
||||
orr.w r7, r11, r9, lsl #16
|
||||
|
||||
eors r1, r6
|
||||
orr.w r6, r8, r10, lsl #16
|
||||
|
||||
eors r2, r7
|
||||
eors r3, r6
|
||||
|
||||
stmia r5!, {r0-r3}
|
||||
|
||||
pop {r5-r11, pc}
|
||||
#else
|
||||
b . //crash in case the function was called on thumb1 core
|
||||
#endif
|
|
@ -0,0 +1,498 @@
|
|||
/*!
|
||||
* \file CM7_1T_AES_encrypt.S
|
||||
* \brief
|
||||
*
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 17 jun 2018
|
||||
*/
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
.text
|
||||
.section .ramfunc.$SRAM_ITC
|
||||
|
||||
.align 3
|
||||
// void CM7_1T_AES_encrypt(uint8_t* rk, const uint8_t* in, uint8_t* out, size_t rounds) {
|
||||
.global CM7_1T_AES_encrypt
|
||||
.type CM7_1T_AES_encrypt,%function
|
||||
CM7_1T_AES_encrypt:
|
||||
#if __ARM_ARCH_7EM__
|
||||
//pld here at +1 cycle?
|
||||
push {r2,r4-r11,lr} //stack out
|
||||
|
||||
mov r12, r0
|
||||
add r11, r0, r3, lsl #4 // rk_end-16 = rk + rounds * 16
|
||||
|
||||
movw r14, #:lower16:AES_Te2
|
||||
movt r14, #:upper16:AES_Te2
|
||||
|
||||
//load input
|
||||
ldmia r1!, {r4-r7}
|
||||
|
||||
//load key
|
||||
ldmia r12!, {r0-r3}
|
||||
|
||||
//initial addroundkey in loop
|
||||
|
||||
// global allocation
|
||||
// r11 - final ptr
|
||||
// r12 - current rk ptr
|
||||
// r14 - Te2
|
||||
|
||||
// r0 - s00 | s10 | s20 | s30
|
||||
// r1 - s01 | s11 | s21 | s31
|
||||
// r2 - s02 | s12 | s22 | s32
|
||||
// r3 - s03 | s13 | s23 | s33
|
||||
|
||||
// rotation after Te2
|
||||
// xor 16 | 8 | 0 | 24
|
||||
|
||||
// r4 - s00T ^ s11T ^ s22T ^ s33T
|
||||
// r5 - s01T ^ s12T ^ s23T ^ s30T
|
||||
// r6 - s02T ^ s13T ^ s20T ^ s31T
|
||||
// r7 - s03T ^ s10T ^ s21T ^ s32T
|
||||
|
||||
1: // skew addroundkey to begginning of loop
|
||||
eors r0, r4
|
||||
eors r1, r5
|
||||
|
||||
eors r2, r6
|
||||
eors r3, r7
|
||||
|
||||
// start from col 1
|
||||
uxtb.w r5, r3, ror #16
|
||||
lsrs r4, r0, #24
|
||||
|
||||
uxtb r6, r1
|
||||
ldr.w r10, [r14, r4, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - s00 | s10 | s20 |
|
||||
// r1 - | s11 | s21 | s31
|
||||
// r2 - s02 | s12 | s22 | s32
|
||||
// r3 - s03 | s13 | | s33
|
||||
// r4 -
|
||||
// r5 - s23 // c1
|
||||
// r6 - s01 // c1 ror16
|
||||
// r7 -
|
||||
// r8 -
|
||||
// r9 -
|
||||
// r10 - s30T // c1 ror24
|
||||
|
||||
lsrs r4, r3, #24
|
||||
ldr.w r5, [r14, r5, lsl #2]
|
||||
|
||||
uxtb r7, r0
|
||||
ldr.w r9, [r14, r6, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | s20 |
|
||||
// r1 - | s11 | s21 | s31
|
||||
// r2 - s02 | s12 | s22 | s32
|
||||
// r3 - s03 | s13 | |
|
||||
// r4 - s33 // c0 ror24
|
||||
// r5 - s23T // c1
|
||||
// r6 -
|
||||
// r7 - s00 // c0 ror16
|
||||
// r8 -
|
||||
// r9 - s01T // c1 ror16
|
||||
// r10 - s30T // c1 ror24
|
||||
|
||||
uxtb r6, r2, ror #8
|
||||
ldr.w r8, [r14, r4, lsl #2]
|
||||
|
||||
uxtb r4, r2, ror #16
|
||||
eor.w r5, r5, r10, ror #24
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | s20 |
|
||||
// r1 - | s11 | s21 | s31
|
||||
// r2 - s02 | | | s32
|
||||
// r3 - s03 | s13 | |
|
||||
// r4 - s22 // c0
|
||||
// r5 - s23T ^ s30T // c1
|
||||
// r6 - s12 // c1 ror8
|
||||
// r7 - s00 // c0 ror16
|
||||
// r8 - s33T // c0 ror24
|
||||
// r9 - s01T // c1 ror16
|
||||
// r10 -
|
||||
|
||||
eor.w r5, r5, r9, ror #16
|
||||
ldr.w r10, [r14, r7, lsl #2]
|
||||
|
||||
uxtb.w r9, r1, ror #8
|
||||
ldr.w r4, [r14, r4, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | s20 |
|
||||
// r1 - | | s21 | s31
|
||||
// r2 - s02 | | | s32
|
||||
// r3 - s03 | s13 | |
|
||||
// r4 - s22T // c0
|
||||
// r5 - s23T ^ s30T ^ s01T // c1
|
||||
// r6 - s12 // c1 ror8
|
||||
// r7 -
|
||||
// r8 - s33T // c0 ror24
|
||||
// r9 - s11 // c0 ror8
|
||||
// r10 - s00T // c0 ror16
|
||||
|
||||
uxtb.w r7, r0, ror #16
|
||||
ldr.w r6, [r14, r6, lsl #2]
|
||||
|
||||
eor.w r4, r4, r10, ror #16
|
||||
ldr.w r9, [r14, r9, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | |
|
||||
// r1 - | | s21 | s31
|
||||
// r2 - s02 | | | s32
|
||||
// r3 - s03 | s13 | |
|
||||
// r4 - s22T ^ s00T // c0
|
||||
// r5 - s23T ^ s30T ^ s01T // c1
|
||||
// r6 - s12T // c1 ror8
|
||||
// r7 - s20 // c2
|
||||
// r8 - s33T // c0 ror24
|
||||
// r9 - s11T // c0 ror8
|
||||
// r10 -
|
||||
|
||||
eor.w r5, r5, r6, ror #8
|
||||
ldr.w r10, [r14, r7, lsl #2]
|
||||
|
||||
uxtb.w r0, r0, ror #8
|
||||
eor.w r4, r4, r8, ror #24
|
||||
|
||||
//current alloctaion
|
||||
// r0 - s10 // c3 ror8
|
||||
// r1 - | | s21 | s31
|
||||
// r2 - s02 | | | s32
|
||||
// r3 - s03 | s13 | |
|
||||
// r4 - s22T ^ s00T ^ s33T // c0
|
||||
// r5 - col 1
|
||||
// r6 -
|
||||
// r7 -
|
||||
// r8 -
|
||||
// r9 - s11T // c0 ror8
|
||||
// r10 - s20T // c2
|
||||
|
||||
uxtb r7, r2
|
||||
eor.w r4, r4, r9, ror #8
|
||||
|
||||
lsrs r6, r1, #24
|
||||
ldr.w r9, [r14, r0, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 -
|
||||
// r1 - | | s21 |
|
||||
// r2 - | | | s32
|
||||
// r3 - s03 | s13 | |
|
||||
// r4 - col 0
|
||||
// r5 - col 1
|
||||
// r6 - s31 // c2 ror24
|
||||
// r7 - s02 // c2 ror16
|
||||
// r8 -
|
||||
// r9 - s10T // c3 ror8
|
||||
// r10 - s20T // c2
|
||||
|
||||
uxtb.w r8, r3, ror #8
|
||||
ldr.w r7, [r14, r7, lsl #2]
|
||||
|
||||
uxtb.w r1, r1, ror #16
|
||||
ldr.w r6, [r14, r6, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 -
|
||||
// r1 - s21 // c3
|
||||
// r2 - | | | s32
|
||||
// r3 - s03 | | |
|
||||
// r4 - col 0
|
||||
// r5 - col 1
|
||||
// r6 - s31T // c2 ror24
|
||||
// r7 - s02T // c2 ror16
|
||||
// r8 - s13 // c2 ror8
|
||||
// r9 - s10T // c3 ror8
|
||||
// r10 - s20T // c2
|
||||
|
||||
lsrs r2, #24
|
||||
ldr.w r8, [r14, r8, lsl #2]
|
||||
|
||||
uxtb r3, r3
|
||||
eor.w r6, r10, r6, ror #24
|
||||
|
||||
//current alloctaion
|
||||
// r0 -
|
||||
// r1 - s21 // c3
|
||||
// r2 - s32 // c3 ror24
|
||||
// r3 - s03 // c3 ror16
|
||||
// r4 - col 0
|
||||
// r5 - col 1
|
||||
// r6 - s20T ^ s31T // c2
|
||||
// r7 - s02T // c2 ror16
|
||||
// r8 - s13T // c2 ror8
|
||||
// r9 - s10T // c3 ror8
|
||||
// r10 -
|
||||
|
||||
eor.w r6, r6, r7, ror #16
|
||||
ldr.w r10, [r14, r1, lsl #2]
|
||||
|
||||
eor.w r6, r6, r8, ror #8
|
||||
ldr.w r7, [r14, r2, lsl #2]
|
||||
|
||||
ldrd r0,r1, [r12], #16
|
||||
|
||||
//current alloctaion
|
||||
// r0 - rk[0]
|
||||
// r1 - rk[1]
|
||||
// r2 -
|
||||
// r3 - s03 // c3 ror16
|
||||
// r4 - col 0
|
||||
// r5 - col 1
|
||||
// r6 - col 2
|
||||
// r7 - s32T // c3 ror24
|
||||
// r8 -
|
||||
// r9 - s10T // c3 ror8
|
||||
// r10 - s21T // c3
|
||||
|
||||
cmp r11, r12 // cmp early
|
||||
ldr.w r8, [r14, r3, lsl #2]
|
||||
|
||||
eor.w r7, r10, r7, ror #24
|
||||
ldr r2, [r12, #-8]
|
||||
|
||||
eor.w r7, r7, r9, ror #8
|
||||
ldr r3, [r12, #-4]
|
||||
|
||||
eor.w r7, r7, r8, ror #16
|
||||
bne 1b
|
||||
|
||||
eors r0, r4
|
||||
eors r1, r5
|
||||
|
||||
eors r2, r6
|
||||
eors r3, r7
|
||||
|
||||
// final round
|
||||
uxtb r6, r3, ror #16 // row 2 col 1
|
||||
lsrs.w r7, r0, #24 // row 3 col 1 // cant .n
|
||||
|
||||
uxtb r5, r2, ror #8 // row 1 col 1
|
||||
ldrb.w r8, [r14, r7, lsl #2]
|
||||
|
||||
uxtb.w r4, r1 // row 0 col 1 // cant .n
|
||||
ldrb.w r6, [r14, r6, lsl #2]
|
||||
|
||||
lsrs.w r7, r3, #24 // row 3 col 0 // cant .n
|
||||
ldrb.w r5, [r14, r5, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - s00 | s10 | s20 |
|
||||
// r1 - | s11 | s21 | s31
|
||||
// r2 - s02 | | s22 | s32
|
||||
// r3 - s03 | s13 | |
|
||||
// r4 - s01 //r0c1
|
||||
// r5 - s12` //r1c1
|
||||
// r6 - s23` //r2c1
|
||||
// r7 - s33 //r3c0
|
||||
// r8 - s30` //r3c1
|
||||
// r9 -
|
||||
// r10 -
|
||||
// r11 -
|
||||
|
||||
orr.w r11, r6, r8, lsl #8 // col 1 upper part
|
||||
ldrb.w r9, [r14, r4, lsl #2]
|
||||
|
||||
uxtb r6, r2, ror #16 // row 2 col 0
|
||||
ldrb.w r8, [r14, r7, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - s00 | s10 | s20 |
|
||||
// r1 - | s11 | s21 | s31
|
||||
// r2 - s02 | | | s32
|
||||
// r3 - s03 | s13 | |
|
||||
// r4 -
|
||||
// r5 - s12` //r1c1
|
||||
// r6 - s22 //r2c0
|
||||
// r7 -
|
||||
// r8 - s33` //r3c0
|
||||
// r9 - s01` //r0c1
|
||||
// r10 -
|
||||
// r11 - s23` | s30` //c1 upper
|
||||
|
||||
uxtb.w r7, r0 // row 0 col 0 // cant .n
|
||||
orr.w r10, r9, r5, lsl #8 // col 1 bottom part
|
||||
|
||||
uxtb r4, r1, ror #8 // row 1 col 0
|
||||
ldrb.w r6, [r14, r6, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | s20 |
|
||||
// r1 - | | s21 | s31
|
||||
// r2 - s02 | | | s32
|
||||
// r3 - s03 | s13 | |
|
||||
// r4 - s11 //r1c0
|
||||
// r5 -
|
||||
// r6 - s22` //r2c0
|
||||
// r7 - s00 //r0c0
|
||||
// r8 - s33` //r3c0
|
||||
// r9 -
|
||||
// r10 - s01` | s12` //c1 bottom
|
||||
// r11 - s23` | s30` //c1 upper
|
||||
|
||||
uxtb r5, r0, ror #16 // row 2 col 2
|
||||
ldrb.w r9, [r14, r7, lsl #2]
|
||||
|
||||
orr.w r8, r6, r8, lsl #8 // col 0 upper part
|
||||
ldrb.w r4, [r14, r4, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | |
|
||||
// r1 - | | s21 | s31
|
||||
// r2 - s02 | | | s32
|
||||
// r3 - s03 | s13 | |
|
||||
// r4 - s11` //r1c0
|
||||
// r5 - s20 //r2c2
|
||||
// r6 -
|
||||
// r7 -
|
||||
// r8 - s22` | s33` //c0 upper
|
||||
// r9 - s00` //r0c0
|
||||
// r10 - s01` | s12` //c1 bottom
|
||||
// r11 - s23` | s30` //c1 upper
|
||||
|
||||
uxtb r6, r2 // row 0 col 2
|
||||
lsrs r7, r1, #24 // row 3 col 2
|
||||
|
||||
lsrs r2, r2, #24 // row 3 col 3
|
||||
ldrb.w r7, [r14, r7, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | |
|
||||
// r1 - | | s21 |
|
||||
// r2 - s32 //r3c3
|
||||
// r3 - s03 | s13 | |
|
||||
// r4 - s11` //r1c0
|
||||
// r5 - s20 //r2c2
|
||||
// r6 - s02 //r0c2
|
||||
// r7 - s31` //r3c2
|
||||
// r8 - s22` | s33` //c0 upper
|
||||
// r9 - s00` //r0c0
|
||||
// r10 - s01` | s12` //c1 bottom
|
||||
// r11 - s23` | s30` //c1 upper
|
||||
|
||||
orr.w r9, r9, r4, lsl #8 // col 0 bottom part
|
||||
ldrb.w r5, [r14, r5, lsl #2]
|
||||
|
||||
uxtb r4, r3, ror #8 // row 1 col 2
|
||||
ldrb.w r6, [r14, r6, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | |
|
||||
// r1 - | | s21 |
|
||||
// r2 - s32 //r3c3
|
||||
// r3 - s03 | | |
|
||||
// r4 - s13 //r1c2
|
||||
// r5 - s20` //r2c2
|
||||
// r6 - s02` //r0c2
|
||||
// r7 - s31` //r3c2
|
||||
// r8 - s22` | s33` //c0 upper
|
||||
// r9 - s00` | s11` //c0 bottom
|
||||
// r10 - s01` | s12` //c1 bottom
|
||||
// r11 - s23` | s30` //c1 upper
|
||||
|
||||
uxtb r3, r3 // row 0 col 3
|
||||
orr.w r7, r5, r7, lsl #8 // col 2 upper part
|
||||
|
||||
uxtb r1, r1, ror #16 // row 2 col 3
|
||||
ldrb.w r4, [r14, r4, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - | s10 | |
|
||||
// r1 - s21 //r2c3
|
||||
// r2 - s32 //r3c3
|
||||
// r3 - s03 //r0c3
|
||||
// r4 - s13` //r1c2
|
||||
// r5 -
|
||||
// r6 - s02` //r0c2
|
||||
// r7 - s20` | s31` //c2 upper
|
||||
// r8 - s22` | s33` //c0 upper
|
||||
// r9 - s00` | s11` //c0 bottom
|
||||
// r10 - s01` | s12` //c1 bottom
|
||||
// r11 - s23` | s30` //c1 upper
|
||||
|
||||
uxtb r0, r0, ror #8 // row 1 col 3
|
||||
ldrb.w r2, [r14, r2, lsl #2]
|
||||
|
||||
orr.w r5, r9, r8, lsl #16 // col 0
|
||||
ldrb.w r1, [r14, r1, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - s10 //r1c3
|
||||
// r1 - s21` //r2c3
|
||||
// r2 - s32` //r3c3
|
||||
// r3 - s03 //r0c3
|
||||
// r4 - s13` //r1c2
|
||||
// r5 - col 0
|
||||
// r6 - s02` //r0c2
|
||||
// r7 - s20` | s31` //c2 upper
|
||||
// r8 -
|
||||
// r9 -
|
||||
// r10 - s01` | s12` //c1 bottom
|
||||
// r11 - s23` | s30` //c1 upper
|
||||
|
||||
orr.w r4, r6, r4, lsl #8 // c2 bottom
|
||||
ldrb.w r0, [r14, r0, lsl #2]
|
||||
|
||||
orr.w r8, r1, r2, lsl #8 // col 3 upper part
|
||||
ldrb.w r3, [r14, r3, lsl #2]
|
||||
|
||||
//current alloctaion
|
||||
// r0 - s10` //r1c3
|
||||
// r1 -
|
||||
// r2 -
|
||||
// r3 - s03` //r0c3
|
||||
// r4 - s02` | s13` //c2 bottom
|
||||
// r5 - col 0
|
||||
// r6 -
|
||||
// r7 - s20` | s31` //c2 upper
|
||||
// r8 - s21` | s32` //c3 upper
|
||||
// r9 -
|
||||
// r10 - s01` | s12` //c1 bottom
|
||||
// r11 - s23` | s30` //c1 upper
|
||||
|
||||
orr.w r7, r4, r7, lsl #16 // col 2
|
||||
ldr r4, [sp, #4] // pop early to pop even number of registers
|
||||
|
||||
orr.w r9, r3, r0, lsl #8 // col 3 bottom part
|
||||
ldr r6, [sp], #8 // load output pointer and clear stack
|
||||
|
||||
ldm r12, {r0-r3}
|
||||
|
||||
//current alloctaion
|
||||
// r0 - rk[0]
|
||||
// r1 - rk[1]
|
||||
// r2 - rk[2]
|
||||
// r3 - rk[3]
|
||||
// r4 - unstacked
|
||||
// r5 - col 0
|
||||
// r6 - out p
|
||||
// r7 - col 2
|
||||
// r8 - s21` | s32` //c3 upper
|
||||
// r9 - s03` | s10` //c3 bottom
|
||||
// r10 - s01` | s12` //c1 bottom
|
||||
// r11 - s23` | s30` //c1 upper
|
||||
|
||||
eors r0, r5
|
||||
orr.w r5, r10, r11, lsl #16 // col 1
|
||||
|
||||
eors r1, r5
|
||||
orr.w r5, r9, r8, lsl #16 // col 3
|
||||
|
||||
eors r2, r7
|
||||
eors r3, r5
|
||||
|
||||
stmia r6!, {r0-r3}
|
||||
|
||||
pop {r5-r11,pc}
|
||||
#else
|
||||
b . //crash in case the function was called on thumb1 core
|
||||
#endif
|
|
@ -0,0 +1,120 @@
|
|||
/*!
|
||||
* \file CM7_1T_AES_keyschedule_dec.S
|
||||
* \brief
|
||||
*
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 17 jun 2018
|
||||
*/
|
||||
|
||||
// performs perform equivalent inverse cipher transformation on expanded encryption key
|
||||
// order of round keys is not inverted - decryption will read it in reverse
|
||||
|
||||
.syntax unified
|
||||
.thumb
|
||||
.text
|
||||
.section .ramfunc.$SRAM_ITC
|
||||
|
||||
.align 3
|
||||
// void CM7_1T_AES_keyschedule_dec(uint8_t* rk, size_t rounds) {
|
||||
.global CM7_1T_AES_keyschedule_dec
|
||||
.type CM7_1T_AES_keyschedule_dec,%function
|
||||
CM7_1T_AES_keyschedule_dec:
|
||||
#if __ARM_ARCH_7EM__
|
||||
push {r4-r10, lr}
|
||||
|
||||
//first and last block are ommited
|
||||
add r10, r0, #16
|
||||
add r0, r0, r1, lsl #4 //rk_end-16 = rk + rounds * 16
|
||||
|
||||
movw r14, #:lower16:AES_Te2
|
||||
movt r14, #:upper16:AES_Te2
|
||||
movw r12, #:lower16:AES_Td2
|
||||
movt r12, #:upper16:AES_Td2
|
||||
|
||||
nop // align loop entry, wasted cycle
|
||||
b 2f // skip skewed part
|
||||
|
||||
//expand 2 columns in parallel to avoid stalls
|
||||
//expand backwards for better code density, redoing to forward needs care for stalls due to .w/.n instr combinations
|
||||
|
||||
//Td[sbox[]] - 4 regs, cannot be dual issued with itself or other block
|
||||
//dsp invMixColums - 4+1 regs + few more insns, cannot be dual issued with itself or other block
|
||||
// r2 = S{1}
|
||||
// r5 = S{2}
|
||||
// r6 = S{4}
|
||||
// r7 = S{8}
|
||||
// r9 = tmp
|
||||
|
||||
// r2 - S{9} = S{8} ^ S{1}
|
||||
// r5 - S{b} = S{9} ^ S{2}
|
||||
// r6 - S{d} = S{9} ^ S{4}
|
||||
// r7 - S{e} = S{8} ^ S{4} ^ S{2}
|
||||
|
||||
// r2 = s0{e}^s1{b}^s2{d}^s3{9} | s1{e}^s2{b}^s3{d}^s0{9} | s2{e}^s3{b}^s0{d}^s1{9} | s3{e}^s0{b}^s1{d}^s2{9}
|
||||
|
||||
1: eor r2, r7, r6, ror #16
|
||||
str.w r2, [r0, #4] // cant .n due to skip
|
||||
|
||||
2: ldrd r1,r2, [r0, #-8]!
|
||||
|
||||
uadd8 r5, r2, r2 // quad lsl #1
|
||||
and.w r3, r1, #0xff
|
||||
|
||||
uxtb r8, r1, ror #16
|
||||
eor r9, r5, #0x1b1b1b1b
|
||||
|
||||
sel r5, r9, r5 // S{2} // if uadd carried then take reduced byte
|
||||
ldrb r3, [r14, r3, lsl #2]
|
||||
|
||||
uadd8 r6, r5, r5 // quad lsl #1
|
||||
ldrb r8, [r14, r8, lsl #2]
|
||||
|
||||
uxtb r4, r1, ror #8
|
||||
eor r9, r6, #0x1b1b1b1b
|
||||
|
||||
sel r6, r9, r6 // S{4}
|
||||
ldr r3, [r12, r3, lsl #2]
|
||||
|
||||
uadd8 r7, r6, r6 // quad lsl #1
|
||||
ldrb r4, [r14, r4, lsl #2]
|
||||
|
||||
eor r9, r7, #0x1b1b1b1b
|
||||
ldr r8, [r12, r8, lsl #2]
|
||||
|
||||
sel r7, r9, r7 // S{8}
|
||||
lsr.w r1, r1, #24 // .n adds 1 loop invariant cycle
|
||||
|
||||
eor.w r8, r8, r3, ror #16
|
||||
ldr r4, [r12, r4, lsl #2]
|
||||
|
||||
eors r2, r7 // S{9}
|
||||
ldrb r1, [r14, r1, lsl #2]
|
||||
|
||||
eors r7, r5
|
||||
eor.w r8, r8, r4, ror #8
|
||||
|
||||
eors r7, r6 // S{e}
|
||||
cmp r10, r0
|
||||
|
||||
eor.w r7, r7, r2, ror #24
|
||||
ldr r1, [r12, r1, lsl #2]
|
||||
|
||||
eor.w r5, r2 // S{b}
|
||||
eor.w r6, r2 // S{d}
|
||||
|
||||
eor.w r1, r8, r1, ror #24
|
||||
str r1, [r0]
|
||||
|
||||
eor r7, r7, r5, ror #8
|
||||
bne 1b
|
||||
|
||||
//skewed also at beggining of the loop
|
||||
eor r2, r7, r6, ror #16
|
||||
str.w r2, [r0, #4] // if everything is .w, +1 cycle here
|
||||
|
||||
pop {r4-r10, pc}
|
||||
#else
|
||||
b . //crash in case the function was called on non dsp core
|
||||
#endif
|
|
@ -0,0 +1,217 @@
|
|||
/*!
|
||||
* \file AES_lookup_tables.c
|
||||
* \brief lookup tables used in some implementations
|
||||
*
|
||||
* If something is unused it will not waste memory.
|
||||
*
|
||||
* Alignment is required to avoid crossing 2 different memory busses. (minimum AHB granularity for bus slave in cortex-m is 1kB)
|
||||
*
|
||||
* To avoid data dependent load time differences, those tables have to be placed in deterministic memory section. (usually TCM/SRAM)
|
||||
* You might want to create dedicated section in linker script for those, to make sure that the correct memory block is used.
|
||||
*
|
||||
* `const` specifier cannot be used since it will move tables to flash memory that is not only non-deterministic, but it also
|
||||
* beats the main purpose of using large lookup tables. (errors when trying to put in SRAM by section attribute anyway)
|
||||
*
|
||||
* \todo runtime gen at startup instead of storage
|
||||
*
|
||||
* \author Jan Oleksiewicz <jnk0le@hotmail.com>
|
||||
* \license SPDX-License-Identifier: MIT
|
||||
* \date 12 Jun 2018
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/*
|
||||
Use section attribute to put tables in a designated deterministic section (.data is used by default)
|
||||
I recommend using ".section.XXX" naming to let the compiler do proper GC and reordering.
|
||||
|
||||
section(".AES_TABLES.sbox")
|
||||
section(".AES_TABLES.inv_sbox")
|
||||
section(".AES_TABLES.Te2")
|
||||
section(".AES_TABLES.Td2")
|
||||
|
||||
If .data section is already in DTCM and you just want to make sure it is as explicit as possible
|
||||
|
||||
.data : ALIGN(4) {
|
||||
PROVIDE(__data_start__ = .);
|
||||
*(.AES_TABLES .AES_TABLES.*)
|
||||
*(.data .data.* .gnu.linkonce.d.*)
|
||||
PROVIDE(__data_end__ = .);
|
||||
} > DTCM AT > FLASH
|
||||
|
||||
If .data section is not placed in deterministic memory block, then you have to create another output section:
|
||||
|
||||
.AES_TABLES : ALIGN(4) {
|
||||
PROVIDE(__aes_tables_start__ = .);
|
||||
*(.AES_TABLES .AES_TABLES.*)
|
||||
PROVIDE(__aes_tables_end__ = .);
|
||||
} > DTCM AT > FLASH
|
||||
|
||||
PROVIDE(__aes_tables_init_start__ = LOADADDR(.AES_TABLES));
|
||||
|
||||
and initialize it somewhere at startup:
|
||||
|
||||
extern size_t __aes_tables_init_start__;
|
||||
extern size_t __aes_tables_start__;
|
||||
extern size_t __aes_tables_end__;
|
||||
|
||||
for(int i = 0; i < (&__aes_tables_end__ - &__aes_tables_start__); i++) {
|
||||
(&__aes_tables_start__)[i] = (&__aes_tables_init_start__)[i]; // copy by 4 bytes
|
||||
}
|
||||
*/
|
||||
|
||||
uint8_t AES_inv_sbox[256] __attribute__((aligned(256), section(".data.AES_inv_sbox"))) =
|
||||
{
|
||||
0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
|
||||
0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
|
||||
0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
|
||||
0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
|
||||
0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
|
||||
0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
|
||||
0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
|
||||
0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
|
||||
0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
|
||||
0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
|
||||
0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
|
||||
0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
|
||||
0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
|
||||
0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
|
||||
0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
|
||||
0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D
|
||||
};
|
||||
|
||||
uint32_t AES_Te2[256] __attribute__((aligned(1024), section(".data.AES_Te2"))) =
|
||||
{
|
||||
0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b,
|
||||
0xf2ff0df2, 0x6bd6bd6b, 0x6fdeb16f, 0xc59154c5,
|
||||
0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b,
|
||||
0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76,
|
||||
0xca8f45ca, 0x821f9d82, 0xc98940c9, 0x7dfa877d,
|
||||
0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0,
|
||||
0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf,
|
||||
0x9c23bf9c, 0xa453f7a4, 0x72e49672, 0xc09b5bc0,
|
||||
0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26,
|
||||
0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc,
|
||||
0x34685c34, 0xa551f4a5, 0xe5d134e5, 0xf1f908f1,
|
||||
0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15,
|
||||
0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3,
|
||||
0x18302818, 0x9637a196, 0x050a0f05, 0x9a2fb59a,
|
||||
0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2,
|
||||
0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75,
|
||||
0x09121b09, 0x831d9e83, 0x2c58742c, 0x1a342e1a,
|
||||
0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0,
|
||||
0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3,
|
||||
0x29527b29, 0xe3dd3ee3, 0x2f5e712f, 0x84139784,
|
||||
0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced,
|
||||
0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b,
|
||||
0x6ad4be6a, 0xcb8d46cb, 0xbe67d9be, 0x39724b39,
|
||||
0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf,
|
||||
0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb,
|
||||
0x4386c543, 0x4d9ad74d, 0x33665533, 0x85119485,
|
||||
0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f,
|
||||
0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8,
|
||||
0x51a2f351, 0xa35dfea3, 0x4080c040, 0x8f058a8f,
|
||||
0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5,
|
||||
0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321,
|
||||
0x10203010, 0xffe51aff, 0xf3fd0ef3, 0xd2bf6dd2,
|
||||
0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec,
|
||||
0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917,
|
||||
0xc49357c4, 0xa755f2a7, 0x7efc827e, 0x3d7a473d,
|
||||
0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573,
|
||||
0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc,
|
||||
0x22446622, 0x2a547e2a, 0x903bab90, 0x880b8388,
|
||||
0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14,
|
||||
0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db,
|
||||
0xe0db3be0, 0x32645632, 0x3a744e3a, 0x0a141e0a,
|
||||
0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c,
|
||||
0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662,
|
||||
0x9139a891, 0x9531a495, 0xe4d337e4, 0x79f28b79,
|
||||
0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d,
|
||||
0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9,
|
||||
0x6cd8b46c, 0x56acfa56, 0xf4f307f4, 0xeacf25ea,
|
||||
0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808,
|
||||
0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e,
|
||||
0x1c38241c, 0xa657f1a6, 0xb473c7b4, 0xc69751c6,
|
||||
0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f,
|
||||
0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a,
|
||||
0x70e09070, 0x3e7c423e, 0xb571c4b5, 0x66ccaa66,
|
||||
0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e,
|
||||
0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9,
|
||||
0x86179186, 0xc19958c1, 0x1d3a271d, 0x9e27b99e,
|
||||
0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311,
|
||||
0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794,
|
||||
0x9b2db69b, 0x1e3c221e, 0x87159287, 0xe9c920e9,
|
||||
0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf,
|
||||
0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d,
|
||||
0xbf65dabf, 0xe6d731e6, 0x4284c642, 0x68d0b868,
|
||||
0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f,
|
||||
0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16,
|
||||
};
|
||||
|
||||
uint32_t AES_Td2[256] __attribute__((aligned(1024), section(".data.AES_Td2"))) =
|
||||
{
|
||||
0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e,
|
||||
0xab3bcb6b, 0x9d1ff145, 0xfaacab58, 0xe34b9303,
|
||||
0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c,
|
||||
0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3,
|
||||
0xb1de495a, 0xba25671b, 0xea45980e, 0xfe5de1c0,
|
||||
0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9,
|
||||
0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59,
|
||||
0xbed42d83, 0x7458d321, 0xe0492969, 0xc98e44c8,
|
||||
0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71,
|
||||
0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a,
|
||||
0xdf63184a, 0x1ae58231, 0x51976033, 0x5362457f,
|
||||
0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b,
|
||||
0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8,
|
||||
0x73ab23d3, 0x4b72e202, 0x1fe3578f, 0x55662aab,
|
||||
0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508,
|
||||
0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82,
|
||||
0xcf8a2b1c, 0x79a792b4, 0x07f3f0f2, 0x694ea1e2,
|
||||
0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe,
|
||||
0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb,
|
||||
0x830b39ec, 0x6040aaef, 0x715e069f, 0x6ebd5110,
|
||||
0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd,
|
||||
0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15,
|
||||
0x981924fb, 0xbdd697e9, 0x4089cc43, 0xd967779e,
|
||||
0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee,
|
||||
0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000,
|
||||
0x80098386, 0x2b3248ed, 0x111eac70, 0x5a6c4e72,
|
||||
0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739,
|
||||
0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e,
|
||||
0x0a0cb167, 0x57930fe7, 0xeeb4d296, 0x9b1b9e91,
|
||||
0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a,
|
||||
0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17,
|
||||
0x090e0b0d, 0x8bf2adc7, 0xb62db9a8, 0x1e14c8a9,
|
||||
0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60,
|
||||
0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e,
|
||||
0x438b7629, 0x23cbdcc6, 0xedb668fc, 0xe4b863f1,
|
||||
0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011,
|
||||
0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1,
|
||||
0x9e1d4b2f, 0xb2dcf330, 0x860dec52, 0xc177d0e3,
|
||||
0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264,
|
||||
0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90,
|
||||
0x4987c74e, 0x38d9c1d1, 0xca8cfea2, 0xd498360b,
|
||||
0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf,
|
||||
0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246,
|
||||
0x8df6c213, 0xd890e8b8, 0x392e5ef7, 0xc382f5af,
|
||||
0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312,
|
||||
0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb,
|
||||
0x26cd0978, 0x596ef418, 0x9aec01b7, 0x4f83a89a,
|
||||
0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8,
|
||||
0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c,
|
||||
0xa431afb2, 0x3f2a3123, 0xa5c63094, 0xa235c066,
|
||||
0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8,
|
||||
0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6,
|
||||
0x4d768dd6, 0xef434db0, 0xaacc544d, 0x96e4df04,
|
||||
0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51,
|
||||
0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41,
|
||||
0x67b35a1d, 0xdb9252d2, 0x10e93356, 0xd66d1347,
|
||||
0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c,
|
||||
0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1,
|
||||
0xd29c59df, 0xf2553f73, 0x141879ce, 0xc773bf37,
|
||||
0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db,
|
||||
0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40,
|
||||
0x1d1672c3, 0xe2bc0c25, 0x3c288b49, 0x0dff4195,
|
||||
0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1,
|
||||
0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257,
|
||||
};
|
|
@ -76,8 +76,7 @@ uint16_t SecureChannel_Open(SecureChannel* sc, SmartCard* card, APDU* apdu, Pair
|
|||
sha512_Update(&sha512, apduData, SHA256_DIGEST_LENGTH);
|
||||
sha512_Final(&sha512, sc->encKey);
|
||||
|
||||
aes_import_param(sc->encKey, sc->encKey, (AES_256_KEY_SIZE << 1));
|
||||
aes_import_param(sc->iv, &apduData[SHA256_DIGEST_LENGTH], AES_IV_SIZE);
|
||||
memcpy(sc->iv, &apduData[SHA256_DIGEST_LENGTH], AES_IV_SIZE);
|
||||
sc->open = 1;
|
||||
|
||||
memset(secret, 0, SECP256K1_PUBLEN);
|
||||
|
@ -88,7 +87,7 @@ uint16_t SecureChannel_Protect_APDU(SecureChannel *sc, APDU* apdu, uint8_t* data
|
|||
len = pad_iso9797_m1(data, SC_PAD, len);
|
||||
uint8_t* apduData = APDU_DATA(apdu);
|
||||
|
||||
if (!aes_encrypt(sc->encKey, sc->iv, data, len, &apduData[AES_IV_SIZE])) {
|
||||
if (!aes_encrypt_cbc(sc->encKey, sc->iv, data, len, &apduData[AES_IV_SIZE])) {
|
||||
memset(data, 0, len);
|
||||
return ERR_CRYPTO;
|
||||
}
|
||||
|
@ -110,7 +109,7 @@ uint16_t SecureChannel_Protect_APDU(SecureChannel *sc, APDU* apdu, uint8_t* data
|
|||
return ERR_CRYPTO;
|
||||
}
|
||||
|
||||
aes_import_param(sc->iv, apduData, AES_IV_SIZE);
|
||||
memcpy(sc->iv, apduData, AES_IV_SIZE);
|
||||
return ERR_OK;
|
||||
}
|
||||
|
||||
|
@ -142,14 +141,14 @@ uint16_t SecureChannel_Decrypt_APDU(SecureChannel *sc, APDU* apdu) {
|
|||
return ERR_CRYPTO;
|
||||
}
|
||||
|
||||
if (!aes_decrypt(sc->encKey, sc->iv, &data[AES_IV_SIZE], (apdu->lr - AES_IV_SIZE), data)) {
|
||||
if (!aes_decrypt_cbc(sc->encKey, sc->iv, &data[AES_IV_SIZE], (apdu->lr - AES_IV_SIZE), data)) {
|
||||
sc->open = 0;
|
||||
return ERR_CRYPTO;
|
||||
}
|
||||
|
||||
apdu->lr = unpad_iso9797_m1(data, (apdu->lr - AES_IV_SIZE));
|
||||
|
||||
aes_import_param(sc->iv, new_iv, AES_IV_SIZE);
|
||||
memcpy(sc->iv, new_iv, AES_IV_SIZE);
|
||||
|
||||
return ERR_OK;
|
||||
}
|
||||
|
@ -161,7 +160,7 @@ uint16_t SecureChannel_Init(SmartCard* card, APDU* apdu, uint8_t* sc_pub, uint8_
|
|||
uint8_t secret[SECP256K1_PUBLEN+3] __attribute__((aligned(4)));
|
||||
|
||||
uint8_t res = ecdh_multiply(&secp256k1, priv, sc_pub, &secret[3]);
|
||||
aes_import_param(secret, &secret[4], AES_256_KEY_SIZE);
|
||||
memcpy(secret, &secret[4], AES_256_KEY_SIZE);
|
||||
|
||||
uint8_t* apduData = APDU_DATA(apdu);
|
||||
apduData[0] = SECP256K1_PUBLEN;
|
||||
|
@ -176,10 +175,9 @@ uint16_t SecureChannel_Init(SmartCard* card, APDU* apdu, uint8_t* sc_pub, uint8_
|
|||
uint8_t iv[AES_IV_SIZE] __attribute__((aligned(4)));
|
||||
random_buffer(iv, AES_IV_SIZE);
|
||||
memcpy(&apduData[SECP256K1_PUBLEN+1], iv, AES_IV_SIZE);
|
||||
aes_import_param(iv, iv, AES_IV_SIZE);
|
||||
|
||||
len = pad_iso9797_m1(data, SC_PAD, len);
|
||||
res = aes_encrypt(secret, iv, data, len, data);
|
||||
res = aes_encrypt_cbc(secret, iv, data, len, data);
|
||||
|
||||
memset(secret, 0, SECP256K1_KEYLEN+3);
|
||||
|
||||
|
@ -225,4 +223,4 @@ void SecureChannel_Close(SecureChannel* sc) {
|
|||
memset(sc->macKey, 0, AES_256_KEY_SIZE);
|
||||
memset(sc->iv, 0, AES_IV_SIZE);
|
||||
sc->open = 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -92,6 +92,7 @@
|
|||
<option id="com.crt.advproject.gcc.hdrlib.336588597" name="Library headers" superClass="com.crt.advproject.gcc.hdrlib" useByScannerDiscovery="false"/>
|
||||
<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="gnu.c.compiler.option.preprocessor.def.symbols.2138057685" name="Defined symbols (-D)" superClass="gnu.c.compiler.option.preprocessor.def.symbols" useByScannerDiscovery="false" valueType="definedSymbols">
|
||||
<listOptionValue builtIn="false" value="__REDLIB__"/>
|
||||
<listOptionValue builtIn="false" value="CM7"/>
|
||||
<listOptionValue builtIn="false" value="CSI_DRIVER_QUEUE_SIZE=2"/>
|
||||
<listOptionValue builtIn="false" value="CPU_MIMXRT1064DVL6A"/>
|
||||
<listOptionValue builtIn="false" value="CPU_MIMXRT1064DVL6A_cm7"/>
|
||||
|
@ -108,7 +109,6 @@
|
|||
<listOptionValue builtIn="false" value="__MCUXPRESSO"/>
|
||||
<listOptionValue builtIn="false" value="__USE_CMSIS"/>
|
||||
<listOptionValue builtIn="false" value="DEBUG"/>
|
||||
<listOptionValue builtIn="false" value="SDK_OS_BAREMETAL"/>
|
||||
</option>
|
||||
<option id="com.crt.advproject.gcc.fpu.1872989765" name="Floating point" superClass="com.crt.advproject.gcc.fpu" useByScannerDiscovery="true" value="com.crt.advproject.gcc.fpu.fpv5sp.hard" valueType="enumerated"/>
|
||||
<option id="com.crt.advproject.gcc.thumb.1957416151" name="Thumb mode" superClass="com.crt.advproject.gcc.thumb" useByScannerDiscovery="false" value="true" valueType="boolean"/>
|
||||
|
|
|
@ -5,6 +5,11 @@
|
|||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.xtext.ui.shared.xtextBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
|
||||
<triggers>clean,full,incremental,</triggers>
|
||||
|
@ -23,6 +28,7 @@
|
|||
<nature>com.nxp.mcuxpresso.core.datamodels.sdkNature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
<nature>org.eclipse.xtext.ui.shared.xtextNature</nature>
|
||||
</natures>
|
||||
<linkedResources>
|
||||
<link>
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
<extension point="org.eclipse.cdt.core.LanguageSettingsProvider">
|
||||
<provider copy-of="extension" id="org.eclipse.cdt.ui.UserLanguageSettingsProvider"/>
|
||||
<provider class="org.eclipse.cdt.managedbuilder.language.settings.providers.GCCBuildCommandParser" id="com.crt.advproject.GCCBuildCommandParser" keep-relative-paths="false" name="MCU GCC Build Output Parser" parameter="(arm-none-eabi-gcc)|(arm-none-eabi-[gc]\+\+)|(gcc)|([gc]\+\+)|(clang)" prefer-non-shared="true"/>
|
||||
<provider class="com.crt.advproject.specs.MCUGCCBuiltinSpecsDetector" console="false" env-hash="360970577880734129" id="com.crt.advproject.GCCBuildSpecCompilerParser" keep-relative-paths="false" name="MCU GCC Built-in Compiler Parser" parameter="${COMMAND} ${FLAGS} -E -P -v -dD "${INPUTS}"" prefer-non-shared="true">
|
||||
<provider class="com.crt.advproject.specs.MCUGCCBuiltinSpecsDetector" console="false" env-hash="-1223034433120124527" id="com.crt.advproject.GCCBuildSpecCompilerParser" keep-relative-paths="false" name="MCU GCC Built-in Compiler Parser" parameter="${COMMAND} ${FLAGS} -E -P -v -dD "${INPUTS}"" prefer-non-shared="true">
|
||||
<language-scope id="org.eclipse.cdt.core.gcc"/>
|
||||
<language-scope id="org.eclipse.cdt.core.g++"/>
|
||||
</provider>
|
||||
|
@ -15,7 +15,7 @@
|
|||
<extension point="org.eclipse.cdt.core.LanguageSettingsProvider">
|
||||
<provider copy-of="extension" id="org.eclipse.cdt.ui.UserLanguageSettingsProvider"/>
|
||||
<provider copy-of="extension" id="com.crt.advproject.GCCBuildCommandParser"/>
|
||||
<provider class="com.crt.advproject.specs.MCUGCCBuiltinSpecsDetector" console="false" env-hash="424039675280054801" id="com.crt.advproject.GCCBuildSpecCompilerParser" keep-relative-paths="false" name="MCU GCC Built-in Compiler Parser" parameter="${COMMAND} ${FLAGS} -E -P -v -dD "${INPUTS}"" prefer-non-shared="true">
|
||||
<provider class="com.crt.advproject.specs.MCUGCCBuiltinSpecsDetector" console="false" env-hash="-1159965335720803855" id="com.crt.advproject.GCCBuildSpecCompilerParser" keep-relative-paths="false" name="MCU GCC Built-in Compiler Parser" parameter="${COMMAND} ${FLAGS} -E -P -v -dD "${INPUTS}"" prefer-non-shared="true">
|
||||
<language-scope id="org.eclipse.cdt.core.gcc"/>
|
||||
<language-scope id="org.eclipse.cdt.core.g++"/>
|
||||
</provider>
|
||||
|
|
14
nxp/nxp.mex
14
nxp/nxp.mex
|
@ -18,9 +18,9 @@
|
|||
<generate_registers_defines>false</generate_registers_defines>
|
||||
</preferences>
|
||||
<tools>
|
||||
<pins name="Pins" version="12.0" enabled="true" update_project_code="true">
|
||||
<pins name="Pins" version="13.0" enabled="true" update_project_code="true">
|
||||
<pins_profile>
|
||||
<processor_version>12.0.1</processor_version>
|
||||
<processor_version>13.0.1</processor_version>
|
||||
<pin_labels>
|
||||
<pin_label pin_num="E3" pin_signal="GPIO_EMC_00" label="SEMC_D0" identifier="SEMC_D0"/>
|
||||
<pin_label pin_num="F3" pin_signal="GPIO_EMC_01" label="SEMC_D1" identifier="SEMC_D1"/>
|
||||
|
@ -731,9 +731,9 @@
|
|||
</function>
|
||||
</functions_list>
|
||||
</pins>
|
||||
<clocks name="Clocks" version="10.0" enabled="true" update_project_code="true">
|
||||
<clocks name="Clocks" version="11.0" enabled="true" update_project_code="true">
|
||||
<clocks_profile>
|
||||
<processor_version>12.0.1</processor_version>
|
||||
<processor_version>13.0.1</processor_version>
|
||||
</clocks_profile>
|
||||
<clock_configurations>
|
||||
<clock_configuration name="BOARD_BootClockRUN" id_prefix="" prefix_user_defined="false">
|
||||
|
@ -886,7 +886,7 @@
|
|||
</clocks>
|
||||
<dcdx name="DCDx" version="3.0" enabled="true" update_project_code="true">
|
||||
<dcdx_profile>
|
||||
<processor_version>12.0.1</processor_version>
|
||||
<processor_version>13.0.1</processor_version>
|
||||
<output_format>c_array</output_format>
|
||||
</dcdx_profile>
|
||||
<dcdx_configurations>
|
||||
|
@ -1027,9 +1027,9 @@
|
|||
</dcdx_configuration>
|
||||
</dcdx_configurations>
|
||||
</dcdx>
|
||||
<periphs name="Peripherals" version="11.0" enabled="true" update_project_code="true">
|
||||
<periphs name="Peripherals" version="12.0" enabled="true" update_project_code="true">
|
||||
<peripherals_profile>
|
||||
<processor_version>12.0.1</processor_version>
|
||||
<processor_version>13.0.1</processor_version>
|
||||
</peripherals_profile>
|
||||
<functional_groups>
|
||||
<functional_group name="BOARD_InitPeripherals" uuid="1c6563a6-c68b-40e5-8828-2853c99f95fa" called_from_default_init="true" id_prefix="BOARD_" core="core0">
|
||||
|
|
Loading…
Reference in New Issue