add SHA-256 implementation

This commit is contained in:
Michele Balistreri 2018-05-14 12:04:55 +03:00
parent c67e77191c
commit 748d5953fa
9 changed files with 694 additions and 1 deletions

61
Inc/bitops.h Normal file
View File

@ -0,0 +1,61 @@
/*
* cifra - embedded cryptography library
* Written in 2014 by Joseph Birr-Pixton <jpixton@gmail.com>
*
* To the extent possible under law, the author(s) have dedicated all
* copyright and related and neighboring rights to this software to the
* public domain worldwide. This software is distributed without any
* warranty.
*
* You should have received a copy of the CC0 Public Domain Dedication
* along with this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifndef BITOPS_H
#define BITOPS_H
#include <stdint.h>
#include <stddef.h>
/* Assorted bitwise and common operations used in ciphers. */
/** Circularly rotate right x by n bits.
* 0 > n > 32. */
static inline uint32_t rotr32(uint32_t x, unsigned n)
{
return (x >> n) | (x << (32 - n));
}
/** Read 4 bytes from buf, as a 32-bit big endian quantity. */
static inline uint32_t read32_be(const uint8_t buf[4])
{
return (buf[0] << 24) |
(buf[1] << 16) |
(buf[2] << 8) |
(buf[3]);
}
/** Encode v as a 32-bit big endian quantity into buf. */
static inline void write32_be(uint32_t v, uint8_t buf[4])
{
*buf++ = (v >> 24) & 0xff;
*buf++ = (v >> 16) & 0xff;
*buf++ = (v >> 8) & 0xff;
*buf = v & 0xff;
}
/** Encode v as a 64-bit big endian quantity into buf. */
static inline void write64_be(uint64_t v, uint8_t buf[8])
{
*buf++ = (v >> 56) & 0xff;
*buf++ = (v >> 48) & 0xff;
*buf++ = (v >> 40) & 0xff;
*buf++ = (v >> 32) & 0xff;
*buf++ = (v >> 24) & 0xff;
*buf++ = (v >> 16) & 0xff;
*buf++ = (v >> 8) & 0xff;
*buf = v & 0xff;
}
#endif

124
Inc/blockwise.h Normal file
View File

@ -0,0 +1,124 @@
/*
* cifra - embedded cryptography library
* Written in 2014 by Joseph Birr-Pixton <jpixton@gmail.com>
*
* To the extent possible under law, the author(s) have dedicated all
* copyright and related and neighboring rights to this software to the
* public domain worldwide. This software is distributed without any
* warranty.
*
* You should have received a copy of the CC0 Public Domain Dedication
* along with this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifndef BLOCKWISE_H
#define BLOCKWISE_H
#include <stdint.h>
#include <stddef.h>
/* Processing function for cf_blockwise_accumulate. */
typedef void (*cf_blockwise_in_fn)(void *ctx, const uint8_t *data);
/* Processing function for cf_blockwise_xor. */
typedef void (*cf_blockwise_out_fn)(void *ctx, uint8_t *data);
/* This function manages the common abstraction of accumulating input in
* a buffer, and processing it when a full block is available.
*
* partial is the buffer (maintained by the caller)
* on entry, npartial is the currently valid count of used bytes on
* the front of partial.
* on exit, npartial is updated to reflect the status of partial.
* nblock is the blocksize to accumulate -- partial must be at least
* this long!
* input is the new data to process, of length nbytes.
* process is the processing function, passed ctx and a pointer
* to the data to process (always exactly nblock bytes long!)
* which may not neccessarily be the same as partial.
*/
void cf_blockwise_accumulate(uint8_t *partial, size_t *npartial,
size_t nblock,
const void *input, size_t nbytes,
cf_blockwise_in_fn process,
void *ctx);
/* This function manages the common abstraction of accumulating input in
* a buffer, and processing it when a full block is available.
* This version supports calling a different processing function for
* the last block.
*
* partial is the buffer (maintained by the caller)
* on entry, npartial is the currently valid count of used bytes on
* the front of partial.
* on exit, npartial is updated to reflect the status of partial.
* nblock is the blocksize to accumulate -- partial must be at least
* this long!
* input is the new data to process, of length nbytes.
* process is the processing function, passed ctx and a pointer
* to the data to process (always exactly nblock bytes long!)
* which may not neccessarily be the same as partial.
* process_final is called last (but may not be called at all if
* all input is buffered).
*/
void cf_blockwise_accumulate_final(uint8_t *partial, size_t *npartial,
size_t nblock,
const void *input, size_t nbytes,
cf_blockwise_in_fn process,
cf_blockwise_in_fn process_final,
void *ctx);
/* This function processes a single byte a number of times. It's useful
* for padding, and more efficient than calling cf_blockwise_accumulate
* a bunch of times.
*
* partial is the buffer (maintained by the caller)
* on entry, npartial is the currently valid count of used bytes on
* the front of partial.
* on exit, npartial is updated to reflect the status of partial.
* nblock is the blocksize to accumulate -- partial must be at least
* this long!
* process is the processing function, passed ctx and a pointer
* to the data to process (always exactly nblock bytes long!)
* which may not neccessarily be the same as partial.
* byte is the byte to process, nbytes times.
*/
void cf_blockwise_acc_byte(uint8_t *partial, size_t *npartial,
size_t nblock,
uint8_t byte, size_t nbytes,
cf_blockwise_in_fn process,
void *ctx);
/* This function attempts to process patterns of bytes common in
* block cipher padding.
*
* This takes three bytes:
* - a first byte, fbyte,
* - a middle byte, mbyte,
* - a last byte, lbyte.
*
* If nbytes is zero, nothing happens.
* If nbytes is one, the byte fbyte ^ lbyte is processed.
* If nbytes is two, the fbyte then lbyte are processed.
* If nbytes is three or more, fbyte, then one or more mbytes, then fbyte
* is processed.
*
* partial is the buffer (maintained by the caller)
* on entry, npartial is the currently valid count of used bytes on
* the front of partial.
* on exit, npartial is updated to reflect the status of partial.
* nblock is the blocksize to accumulate -- partial must be at least
* this long!
* process is the processing function, passed ctx and a pointer
* to the data to process (always exactly nblock bytes long!)
* which may not neccessarily be the same as partial.
*/
void cf_blockwise_acc_pad(uint8_t *partial, size_t *npartial,
size_t nblock,
uint8_t fbyte, uint8_t mbyte, uint8_t lbyte,
size_t nbytes,
cf_blockwise_in_fn process,
void *ctx);
#endif

70
Inc/sha256.h Normal file
View File

@ -0,0 +1,70 @@
/*
* cifra - embedded cryptography library
* Written in 2014 by Joseph Birr-Pixton <jpixton@gmail.com>
*
* To the extent possible under law, the author(s) have dedicated all
* copyright and related and neighboring rights to this software to the
* public domain worldwide. This software is distributed without any
* warranty.
*
* You should have received a copy of the CC0 Public Domain Dedication
* along with this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifndef SHA2_H
#define SHA2_H
#include <stddef.h>
#include <stdint.h>
/* .. c:macro:: CF_SHA256_HASHSZ
* The output size of SHA256: 32 bytes. */
#define CF_SHA256_HASHSZ 32
/* .. c:macro:: CF_SHA256_BLOCKSZ
* The block size of SHA256: 64 bytes. */
#define CF_SHA256_BLOCKSZ 64
/* .. c:type:: cf_sha256_context
* Incremental SHA256 hashing context.
*
* .. c:member:: cf_sha256_context.H
* Intermediate values.
*
* .. c:member:: cf_sha256_context.partial
* Unprocessed input.
*
* .. c:member:: cf_sha256_context.npartial
* Number of bytes of unprocessed input.
*
* .. c:member:: cf_sha256_context.blocks
* Number of full blocks processed.
*/
typedef struct
{
uint32_t H[8]; /* State. */
uint8_t partial[CF_SHA256_BLOCKSZ]; /* Partial block of input. */
uint32_t blocks; /* Number of full blocks processed into H. */
size_t npartial; /* Number of bytes in prefix of partial. */
} cf_sha256_context;
/* .. c:function:: $DECL
* Sets up `ctx` ready to hash a new message.
*/
extern void cf_sha256_init(cf_sha256_context *ctx);
/* .. c:function:: $DECL
* Hashes `nbytes` at `data`. Copies the data if there isn't enough to make
* a full block.
*/
extern void cf_sha256_update(cf_sha256_context *ctx, const void *data, size_t nbytes);
/* .. c:function:: $DECL
* Finishes the hash operation, writing `CF_SHA256_HASHSZ` bytes to `hash`.
*
* This destroys `ctx`.
*/
extern void cf_sha256_digest(cf_sha256_context *ctx, uint8_t hash[CF_SHA256_HASHSZ]);
#endif

View File

@ -3,6 +3,8 @@
#include "stm32l4xx.h"
#include <string.h>
// MCU specific constants
#define FLASH_BANK_SIZE 0x80000
#define FLASH_PAGE_SIZE 0x800
@ -14,4 +16,24 @@
#define UINT32_PTR(n) ((uint32_t *) n)
#define UINT8_PTR(n) ((uint8_t *) n)
#ifndef MIN
#define MIN(x, y) \
({ typeof (x) __x = (x); \
typeof (y) __y = (y); \
__x < __y ? __x : __y; })
#endif
#ifndef MAX
#define MAX(x, y) \
({ typeof (x) __x = (x); \
typeof (y) __y = (y); \
__x > __y ? __x : __y; })
#endif
static inline void memzero(volatile void *v, uint32_t len) {
if (len) {
memset((void *) v, 0, len);
(void) *((volatile uint8_t *) v);
}
}
#endif /* SYS_H_ */

154
Src/blockwise.c Normal file
View File

@ -0,0 +1,154 @@
/*
* cifra - embedded cryptography library
* Written in 2014 by Joseph Birr-Pixton <jpixton@gmail.com>
*
* To the extent possible under law, the author(s) have dedicated all
* copyright and related and neighboring rights to this software to the
* public domain worldwide. This software is distributed without any
* warranty.
*
* You should have received a copy of the CC0 Public Domain Dedication
* along with this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include "blockwise.h"
#include "bitops.h"
#include "sys.h"
#include <string.h>
void cf_blockwise_accumulate(uint8_t *partial, size_t *npartial, size_t nblock,
const void *inp, size_t nbytes,
cf_blockwise_in_fn process,
void *ctx)
{
cf_blockwise_accumulate_final(partial, npartial, nblock,
inp, nbytes,
process, process, ctx);
}
void cf_blockwise_accumulate_final(uint8_t *partial, size_t *npartial, size_t nblock,
const void *inp, size_t nbytes,
cf_blockwise_in_fn process,
cf_blockwise_in_fn process_final,
void *ctx)
{
const uint8_t *bufin = inp;
/* If we have partial data, copy in to buffer. */
if (*npartial && nbytes)
{
size_t space = nblock - *npartial;
size_t taken = MIN(space, nbytes);
memcpy(partial + *npartial, bufin, taken);
bufin += taken;
nbytes -= taken;
*npartial += taken;
/* If that gives us a full block, process it. */
if (*npartial == nblock)
{
if (nbytes == 0)
process_final(ctx, partial);
else
process(ctx, partial);
*npartial = 0;
}
}
/* now nbytes < nblock or *npartial == 0. */
/* If we have a full block of data, process it directly. */
while (nbytes >= nblock)
{
if (nbytes == nblock)
process_final(ctx, bufin);
else
process(ctx, bufin);
bufin += nblock;
nbytes -= nblock;
}
/* Finally, if we have remaining data, buffer it. */
while (nbytes)
{
size_t space = nblock - *npartial;
size_t taken = MIN(space, nbytes);
memcpy(partial + *npartial, bufin, taken);
bufin += taken;
nbytes -= taken;
*npartial += taken;
}
}
void cf_blockwise_acc_byte(uint8_t *partial, size_t *npartial,
size_t nblock,
uint8_t byte, size_t nbytes,
cf_blockwise_in_fn process,
void *ctx)
{
/* only memset the whole of the block once */
int filled = 0;
while (nbytes)
{
size_t start = *npartial;
size_t count = MIN(nbytes, nblock - start);
if (!filled)
memset(partial + start, byte, count);
if (start == 0 && count == nblock)
filled = 1;
if (start + count == nblock)
{
process(ctx, partial);
*npartial = 0;
} else {
*npartial += count;
}
nbytes -= count;
}
}
void cf_blockwise_acc_pad(uint8_t *partial, size_t *npartial,
size_t nblock,
uint8_t fbyte, uint8_t mbyte, uint8_t lbyte,
size_t nbytes,
cf_blockwise_in_fn process,
void *ctx)
{
switch (nbytes)
{
case 0: break;
case 1: fbyte ^= lbyte;
cf_blockwise_accumulate(partial, npartial, nblock, &fbyte, 1, process, ctx);
break;
case 2:
cf_blockwise_accumulate(partial, npartial, nblock, &fbyte, 1, process, ctx);
cf_blockwise_accumulate(partial, npartial, nblock, &lbyte, 1, process, ctx);
break;
default:
cf_blockwise_accumulate(partial, npartial, nblock, &fbyte, 1, process, ctx);
/* If the middle and last bytes differ, then process the last byte separately.
* Otherwise, just extend the middle block size. */
if (lbyte != mbyte)
{
cf_blockwise_acc_byte(partial, npartial, nblock, mbyte, nbytes - 2, process, ctx);
cf_blockwise_accumulate(partial, npartial, nblock, &lbyte, 1, process, ctx);
} else {
cf_blockwise_acc_byte(partial, npartial, nblock, mbyte, nbytes - 1, process, ctx);
}
break;
}
}

View File

@ -1,5 +1,6 @@
#include "main.h"
#include "flash.h"
#include "sha256.h"
int main(void) {
if (!check_firmware(UPGRADE_FW_START)) {
@ -12,10 +13,15 @@ int main(void) {
}
int check_firmware(uintptr_t addr) {
if(*UINT32_PTR(addr) != FW_MAGIC) {
if(UINT32_PTR(addr)[0] != FW_MAGIC) {
return 1;
}
cf_sha256_context ctx;
cf_sha256_init(&ctx);
cf_sha256_update(&ctx, UINT8_PTR(addr), UINT32_PTR(addr)[1]);
uint8_t hash[CF_SHA256_HASHSZ];
cf_sha256_digest(&ctx, hash);
//TODO: verify signature!!!
return 0;

49
Src/memcpy.s Normal file
View File

@ -0,0 +1,49 @@
.text
.syntax unified
.global memcpy
.func memcpy
.thumb_func
memcpy:
/* on entry
* r0 = targ
* r1 = src
* r2 = len (bytes)
* on exit
* r0 = targ (unchanged)
*/
push {r0, r4, lr}
/* If targ or src are unaligned, drop to byte
* processing. */
mov r3, r0
movs r4, #3
orrs r3, r1
ands r3, r4
bne L_bytewise
/* Process words */
L_wordwise:
cmp r2, #4
blo L_bytewise
ldr r4, [r1]
adds r1, #4
str r4, [r0]
adds r0, #4
subs r2, #4
b L_wordwise
/* Process bytes */
L_bytewise:
cmp r2, #0
beq L_fin
ldrb r4, [r1]
adds r1, #1
strb r4, [r0]
adds r0, #1
subs r2, #1
b L_bytewise
L_fin:
pop {r0, r4, pc}
.endfunc

50
Src/memset.s Normal file
View File

@ -0,0 +1,50 @@
.text
.syntax unified
.global memset
.func memset
.thumb_func
memset:
/* on entry
* r0 = targ
* r1 = value
* r2 = len (bytes)
* on exit
* r0 = targ (unchanged)
*/
push {r0, r4, lr}
/* If targ is unaligned, drop to byte
* processing. */
movs r3, #3
ands r3, r0
bne L_bytewise
/* Process words */
/* Build r4 by repeating r1. */
uxtb r4, r1
lsls r3, r4, #8
orrs r4, r3
lsls r3, r4, #16
orrs r4, r3
L_wordwise:
cmp r2, #4
blo L_bytewise
str r4, [r0]
adds r0, #4
subs r2, #4
b L_wordwise
/* Process bytes */
L_bytewise:
cmp r2, #0
beq L_fin
strb r1, [r0]
adds r0, #1
subs r2, #1
b L_bytewise
L_fin:
pop {r0, r4, pc}
.endfunc

157
Src/sha256.c Normal file
View File

@ -0,0 +1,157 @@
/*
* cifra - embedded cryptography library
* Written in 2014 by Joseph Birr-Pixton <jpixton@gmail.com>
*
* To the extent possible under law, the author(s) have dedicated all
* copyright and related and neighboring rights to this software to the
* public domain worldwide. This software is distributed without any
* warranty.
*
* You should have received a copy of the CC0 Public Domain Dedication
* along with this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include "sha256.h"
#include <string.h>
#include "blockwise.h"
#include "bitops.h"
static const uint32_t K[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
# define CH(x, y, z) (((x) & (y)) ^ (~(x) & (z)))
# define MAJ(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
# define BSIG0(x) (rotr32((x), 2) ^ rotr32((x), 13) ^ rotr32((x), 22))
# define BSIG1(x) (rotr32((x), 6) ^ rotr32((x), 11) ^ rotr32((x), 25))
# define SSIG0(x) (rotr32((x), 7) ^ rotr32((x), 18) ^ ((x) >> 3))
# define SSIG1(x) (rotr32((x), 17) ^ rotr32((x), 19) ^ ((x) >> 10))
void cf_sha256_init(cf_sha256_context *ctx)
{
memset(ctx, 0, sizeof *ctx);
ctx->H[0] = 0x6a09e667;
ctx->H[1] = 0xbb67ae85;
ctx->H[2] = 0x3c6ef372;
ctx->H[3] = 0xa54ff53a;
ctx->H[4] = 0x510e527f;
ctx->H[5] = 0x9b05688c;
ctx->H[6] = 0x1f83d9ab;
ctx->H[7] = 0x5be0cd19;
}
static void sha256_update_block(void *vctx, const uint8_t *inp)
{
cf_sha256_context *ctx = vctx;
/* This is a 16-word window into the whole W array. */
uint32_t W[16];
uint32_t a = ctx->H[0],
b = ctx->H[1],
c = ctx->H[2],
d = ctx->H[3],
e = ctx->H[4],
f = ctx->H[5],
g = ctx->H[6],
h = ctx->H[7],
Wt;
for (size_t t = 0; t < 64; t++)
{
/* For W[0..16] we process the input into W.
* For W[16..64] we compute the next W value:
*
* W[t] = SSIG1(W[t - 2]) + W[t - 7] + SSIG0(W[t - 15]) + W[t - 16];
*
* But all W indices are reduced mod 16 into our window.
*/
if (t < 16)
{
W[t] = Wt = read32_be(inp);
inp += 4;
} else {
Wt = SSIG1(W[(t - 2) % 16]) +
W[(t - 7) % 16] +
SSIG0(W[(t - 15) % 16]) +
W[(t - 16) % 16];
W[t % 16] = Wt;
}
uint32_t T1 = h + BSIG1(e) + CH(e, f, g) + K[t] + Wt;
uint32_t T2 = BSIG0(a) + MAJ(a, b, c);
h = g;
g = f;
f = e;
e = d + T1;
d = c;
c = b;
b = a;
a = T1 + T2;
}
ctx->H[0] += a;
ctx->H[1] += b;
ctx->H[2] += c;
ctx->H[3] += d;
ctx->H[4] += e;
ctx->H[5] += f;
ctx->H[6] += g;
ctx->H[7] += h;
ctx->blocks++;
}
void cf_sha256_update(cf_sha256_context *ctx, const void *data, size_t nbytes)
{
cf_blockwise_accumulate(ctx->partial, &ctx->npartial, sizeof ctx->partial,
data, nbytes,
sha256_update_block, ctx);
}
void cf_sha256_digest(cf_sha256_context *ctx, uint8_t hash[CF_SHA256_HASHSZ])
{
uint64_t digested_bytes = ctx->blocks;
digested_bytes = digested_bytes * CF_SHA256_BLOCKSZ + ctx->npartial;
uint64_t digested_bits = digested_bytes * 8;
size_t padbytes = CF_SHA256_BLOCKSZ - ((digested_bytes + 8) % CF_SHA256_BLOCKSZ);
/* Hash 0x80 00 ... block first. */
cf_blockwise_acc_pad(ctx->partial, &ctx->npartial, sizeof ctx->partial,
0x80, 0x00, 0x00, padbytes,
sha256_update_block, ctx);
/* Now hash length. */
uint8_t buf[8];
write64_be(digested_bits, buf);
cf_sha256_update(ctx, buf, 8);
write32_be(ctx->H[0], hash + 0);
write32_be(ctx->H[1], hash + 4);
write32_be(ctx->H[2], hash + 8);
write32_be(ctx->H[3], hash + 12);
write32_be(ctx->H[4], hash + 16);
write32_be(ctx->H[5], hash + 20);
write32_be(ctx->H[6], hash + 24);
write32_be(ctx->H[7], hash + 28);
memset(ctx, 0, sizeof *ctx);
}