blake initial

This commit is contained in:
Nicholas Ward 2022-11-03 16:01:08 -07:00
parent efa80edaa9
commit 1089bbf29e
19 changed files with 2135 additions and 0 deletions

View File

@ -0,0 +1,2 @@
global blake_compression:
// stack:

View File

@ -0,0 +1,34 @@
global blake_iv:
// IV constants (big-endian)
// IV_0
BYTES 106, 9, 230, 103
BYTES 243, 188, 201, 8
// IV_1
BYTES 187, 103, 174, 133
BYTES 132, 202, 167, 59
// IV_2
BYTES 60, 110, 243, 114
BYTES 254, 148, 248, 43
// IV_3
BYTES 165, 79, 245, 58
BYTES 95, 29, 54, 241
// IV_4
BYTES 81, 14, 82, 127
BYTES 173, 230, 130, 209
// IV_5
BYTES 155, 5, 104, 140
BYTES 43, 62, 108, 31
// IV_6
BYTES 31, 131, 217, 171
BYTES 251, 65, 189, 107
// IV_7
BYTES 91, 224, 205, 25
BYTES 19, 126, 33, 121

View File

@ -0,0 +1,3 @@
global blake_g_function:
// stack: i, a, b, c, d

View File

@ -0,0 +1,172 @@
permutation_1_constants:
BYTES 14
BYTES 10
BYTES 4
BYTES 8
BYTES 9
BYTES 15
BYTES 13
BYTES 6
BYTES 1
BYTES 12
BYTES 0
BYTES 2
BYTES 11
BYTES 7
BYTES 5
BYTES 3
permutation_2_constants:
BYTES 11
BYTES 8
BYTES 12
BYTES 0
BYTES 5
BYTES 2
BYTES 15
BYTES 13
BYTES 10
BYTES 14
BYTES 3
BYTES 6
BYTES 7
BYTES 1
BYTES 9
BYTES 4
permutation_3_constants:
BYTES 7
BYTES 9
BYTES 3
BYTES 1
BYTES 13
BYTES 12
BYTES 11
BYTES 14
BYTES 2
BYTES 6
BYTES 5
BYTES 10
BYTES 4
BYTES 0
BYTES 15
BYTES 8
permutation_4_constants:
BYTES 9
BYTES 0
BYTES 5
BYTES 7
BYTES 2
BYTES 4
BYTES 10
BYTES 15
BYTES 14
BYTES 1
BYTES 11
BYTES 12
BYTES 6
BYTES 8
BYTES 3
BYTES 13
permutation_5_constants:
BYTES 2
BYTES 12
BYTES 6
BYTES 10
BYTES 0
BYTES 11
BYTES 8
BYTES 3
BYTES 4
BYTES 13
BYTES 7
BYTES 5
BYTES 15
BYTES 14
BYTES 1
BYTES 9
permutation_6_constants:
BYTES 12
BYTES 5
BYTES 1
BYTES 15
BYTES 14
BYTES 13
BYTES 4
BYTES 10
BYTES 0
BYTES 7
BYTES 6
BYTES 3
BYTES 9
BYTES 2
BYTES 8
BYTES 11
permutation_7_constants:
BYTES 13
BYTES 11
BYTES 7
BYTES 14
BYTES 12
BYTES 1
BYTES 3
BYTES 9
BYTES 5
BYTES 0
BYTES 15
BYTES 4
BYTES 8
BYTES 6
BYTES 2
BYTES 10
permutation_8_constants:
BYTES 6
BYTES 15
BYTES 14
BYTES 9
BYTES 11
BYTES 3
BYTES 0
BYTES 8
BYTES 12
BYTES 2
BYTES 13
BYTES 7
BYTES 1
BYTES 4
BYTES 10
BYTES 5
permutation_9_constants:
BYTES 10
BYTES 2
BYTES 8
BYTES 4
BYTES 7
BYTES 6
BYTES 1
BYTES 5
BYTES 15
BYTES 11
BYTES 9
BYTES 14
BYTES 3
BYTES 12
BYTES 13
BYTES 0
%macro blake_permutation(round, i)
PUSH permutation_1_constants
PUSH $round
%mod_const(10)
%mul_const(16)
ADD
%add_const($i)
%mload_kernel_code
%endmacro

View File

@ -0,0 +1,89 @@
global sha2:
%jump(sha2_store)
global sha2_store:
// stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest
DUP1
// stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest
PUSH 0
// stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest
%mstore_kernel_general
// stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest
PUSH 1
// stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest
store_loop:
// stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest
DUP2
// stack: counter, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest
ISZERO
%jumpi(store_end)
// stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest
%stack (addr, counter, val) -> (addr, val, counter, addr)
// stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest
%mstore_kernel_general
// stack: counter, addr, ... , x[num_bytes-1], retdest
%decrement
// stack: counter-1, addr, ... , x[num_bytes-1], retdest
SWAP1
// stack: addr, counter-1, ... , x[num_bytes-1], retdest
%increment
// stack: addr+1, counter-1, ... , x[num_bytes-1], retdest
%jump(store_loop)
store_end:
// stack: addr, counter, retdest
%pop2
// stack: retdest
%jump(sha2_pad)
// Precodition: input is in memory, starting at 0 of kernel general segment, of the form
// num_bytes, x[0], x[1], ..., x[num_bytes - 1]
// Postcodition: output is in memory, starting at 0, of the form
// num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63]
global sha2_pad:
// stack: retdest
PUSH 0
%mload_kernel_general
// stack: num_bytes, retdest
// STEP 1: append 1
// insert 128 (= 1 << 7) at x[num_bytes+1]
// stack: num_bytes, retdest
PUSH 1
PUSH 7
SHL
// stack: 128, num_bytes, retdest
DUP2
// stack: num_bytes, 128, num_bytes, retdest
%increment
// stack: num_bytes+1, 128, num_bytes, retdest
%mstore_kernel_general
// stack: num_bytes, retdest
// STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1
DUP1
// stack: num_bytes, num_bytes, retdest
%add_const(8)
%div_const(64)
%increment
// stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest
// STEP 3: calculate length := num_bytes*8
SWAP1
// stack: num_bytes, num_blocks, retdest
PUSH 8
MUL
// stack: length = num_bytes*8, num_blocks, retdest
// STEP 4: write length to x[num_blocks*64-7..num_blocks*64]
DUP2
// stack: num_blocks, length, num_blocks, retdest
PUSH 64
MUL
// stack: last_addr = num_blocks*64, length, num_blocks, retdest
%sha2_write_length
// stack: num_blocks, retdest
DUP1
// stack: num_blocks, num_blocks, retdest
// STEP 5: write num_blocks to x[0]
PUSH 0
%mstore_kernel_general
// stack: num_blocks, retdest
%message_schedule_addr_from_num_blocks
%jump(sha2_gen_all_message_schedules)

View File

@ -0,0 +1,96 @@
/// Note that we unpack STATE: 5 to a, b, c, d, e
/// All additions are u32
///
/// def box(a, b, c, d, e, F, K):
///
/// box = get_box(sides, rounds, boxes)
/// a += F(b, c, d)
/// r = load(r)(box)
/// x = load_offset(r)
/// a += x + K
/// s = load(s)(box)
/// a = rol(s, a)
/// a += e
/// c = rol(10, c)
///
/// return e, a, b, c, d, F, K
global box:
// stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt
PUSH pre_rol
DUP5
DUP5
DUP5
DUP10
// stack: F, b, c, d, pre_rol, a, b, c, d, e, F, K, boxes, rounds, sides, virt
JUMP
pre_rol:
// stack: F(b, c, d), a, b, c, d, e, F, K, boxes, rounds, sides, virt
ADD
// stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt
%get_box
// stack: box, a, b, c, d, e, F, K, boxes, rounds, sides, virt
DUP12
DUP2
%mload_kernel_code(r_data)
ADD
// stack: virt + r, box, a, b, c, d, e, F, K, boxes, rounds, sides, virt
%mload_kernel_general_u32_LE
// stack: x, box, a, b, c, d, e, F, K, boxes, rounds, sides, virt
SWAP1
SWAP2
// stack: a, x, box, b, c, d, e, F, K, boxes, rounds, sides, virt
ADD
DUP8
ADD
%as_u32
// stack: a, box, b, c, d, e, F, K, boxes, rounds, sides, virt
PUSH mid_rol
SWAP2
// stack: box, a, mid_rol, b, c, d, e, F, K, boxes, rounds, sides, virt
%mload_kernel_code(s_data)
// stack: s, a, mid_rol, b, c, d, e, F, K, boxes, rounds, sides, virt
%jump(rol)
mid_rol:
// stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt
DUP5
// stack: e, a, b, c, d, e, F, K, boxes, rounds, sides, virt
ADD
%as_u32
// stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt
%stack (a, b, c) -> (10, c, post_rol, a, b)
// stack: 10, c, post_rol, a, b, d, e, F, K, boxes, rounds, sides, virt
%jump(rol)
post_rol:
// stack: c, a, b, d, e, F, K, boxes , rounds, sides, virt
%stack (c, a, b, d, e, F, K, boxes) -> (boxes, 1, a, b, c, d, F, K, e)
// stack: boxes, 1, a, b, c, d, F, K, e, rounds, sides, virt
SUB
SWAP7
// stack: e, a, b, c, d, F, K, boxes-1, rounds, sides, virt
%jump(round)
%macro get_round
// stack: sides, rounds
%mul_const(5)
PUSH 10
SUB
SUB
// stack: 10 - 5*sides - rounds
%endmacro
%macro get_box
// stack: ARGS: 7, boxes, rounds, sides
DUP10
%mul_const(80)
DUP10
%mul_const(16)
DUP10
// stack: boxes , 16*rounds , 80*sides, ARGS: 7, boxes, rounds, sides
PUSH 176
SUB
SUB
SUB
// stack: 176 - boxes - 16*rounds - 80*sides, ARGS: 7, boxes, rounds, sides
%endmacro

View File

@ -0,0 +1,160 @@
/// _block is stored in memory: its address virt stays on the stack
/// def compress(STATE: 5, _block):
///
/// STATEL = STATE
/// STATEL = loop(STATEL)
///
/// STATER = state
/// STATER = loop(STATER)
///
/// return mix(STATER, STATEL, STATE)
///
///
/// def mix(STATER, STATEL, STATE):
/// return
/// u32(s1 + l2 + r3),
/// u32(s2 + l3 + r4),
/// u32(s3 + l4 + r0),
/// u32(s4 + l0 + r1),
/// u32(s0 + l1 + r2)
///
/// where si, li, ri, oi, VR, RD respectively denote
/// STATE[i], STATEL[i], STATER[i], OUTPUT[i], virt, retdest
global compress:
// stack: STATE, virt, retdest
PUSH switch
DUP7
%stack () -> (0, 0, 16, 5, 1)
// stack: 0, 0, 16, 5, 1, virt, switch, STATE, virt, retdest
DUP12
DUP12
DUP12
DUP12
DUP12
// stack: STATE, 0, 0, 16, 5, 1, virt, switch, STATE, virt, retdest
%jump(loop)
switch:
// stack: STATEL, STATE, virt, retdest
PUSH mix
DUP12
%stack () -> (16, 5, 0)
// stack: 16, 5, 0, virt, mix, STATEL, STATE, virt, retdest
DUP15
DUP15
DUP15
DUP15
DUP15
// stack: STATE, 16, 5, 0, virt, mix, STATEL, STATE, virt, retdest
%stack (STATE: 5) -> (STATE, 0, 0)
// stack: STATE, 0, 0, 16, 5, 0, virt, mix, STATEL, STATE, virt, retdest
%jump(loop)
mix:
// stack: r0, r1, r2, r3, r4, l0, l1, l2, l3, l4, s0, s1, s2, s3, s4, VR, RD
SWAP10
// stack: s0, r1, r2, r3, r4, l0, l1, l2, l3, l4, r0, s1, s2, s3, s4, VR, RD
SWAP1
// stack: r1, s0, r2, r3, r4, l0, l1, l2, l3, l4, r0, s1, s2, s3, s4, VR, RD
SWAP6
// stack: l1, s0, r2, r3, r4, l0, r1, l2, l3, l4, r0, s1, s2, s3, s4, VR, RD
%add3_u32
// stack: o4, r3, r4, l0, r1, l2, l3, l4, r0, s1, s2, s3, s4, VR, RD
SWAP14
// stack: RD, r3, r4, l0, r1, l2, l3, l4, r0, s1, s2, s3, s4, VR, o4
SWAP11
// stack: s3, r3, r4, l0, r1, l2, l3, l4, r0, s1, s2, RD, s4, VR, o4
SWAP10
// stack: s2, r3, r4, l0, r1, l2, l3, l4, r0, s1, s3, RD, s4, VR, o4
SWAP1
// stack: r3, s2, r4, l0, r1, l2, l3, l4, r0, s1, s3, RD, s4, VR, o4
SWAP6
// stack: l3, s2, r4, l0, r1, l2, r3, l4, r0, s1, s3, RD, s4, VR, o4
%add3_u32
// stack: o1, l0, r1, l2, r3, l4, r0, s1, s3, RD, s4, VR, o4
SWAP9
// stack: RD, l0, r1, l2, r3, l4, r0, s1, s3, o1, s4, VR, o4
SWAP10
// stack: s4, l0, r1, l2, r3, l4, r0, s1, s3, o1, RD, VR, o4
%add3_u32
// stack: o3, l2, r3, l4, r0, s1, s3, o1, RD, VR, o4
SWAP9
// stack: VR, l2, r3, l4, r0, s1, s3, o1, RD, o3, o4
SWAP5
// stack: s1, l2, r3, l4, r0, VR, s3, o1, RD, o3, o4
%add3_u32
// stack: o0, l4, r0, VR, s3, o1, RD, o3, o4
SWAP4
// stack: s3, l4, r0, VR, o0, o1, RD, o3, o4
%add3_u32
// stack: o2, VR, o0, o1, RD, o3, o4
SWAP4
// stack: RD, VR, o0, o1, o2, o3, o4
SWAP1
// stack: VR, RD, o0, o1, o2, o3, o4
POP
// stack: RD, o0, o1, o2, o3, o4
JUMP
/// def loop(STATE: 5):
/// while rounds:
/// update_round_vars()
/// round(STATE: 5, F, K, rounds, sides)
///
/// def update_round_vars():
/// F = load(F)(sides, rounds)
/// K = load(K)(sides, rounds)
///
/// def round(STATE, rounds, sides):
/// while boxes:
/// box(STATE, F, K)
/// boxes -= 1
/// boxes = 16
/// rounds -= 1
loop:
// stack: STATE, F, K, 16, rounds, sides, virt, retdest
DUP9
// stack: round, STATE, F, K, 16, rounds, sides, virt, retdest
%jumpi(update_round_vars)
// stack: STATE, F, K, 16, 0, sides, virt, retdest
%stack (STATE: 5, F, K, boxes, rounds, sides, virt, retdest) -> (retdest, STATE)
// stack: retdest, STATE
JUMP
update_round_vars:
// stack: STATE, F , K , 16, rounds, sides, virt, retdest
DUP9
DUP11
%get_round
DUP1
// stack: rnd, rnd, STATE, F , K , 16, rounds, sides, virt, retdest
SWAP7
POP
%push_f
SWAP7
// stack: rnd, rnd, STATE, F', K , 16, rounds, sides, virt, retdest
SWAP8
POP
%mload_kernel_code_u32(k_data)
SWAP7
POP
// stack: STATE, F', K', 16, rounds, sides, virt, retdest
%jump(round)
global round:
// stack: STATE, F, K, boxes, rounds , sides, virt, retdest
DUP8
// stack: boxes, STATE, F, K, boxes, rounds , sides, virt, retdest
%jumpi(box)
// stack: STATE, F, K, 0, rounds , sides, virt, retdest
SWAP7
POP
PUSH 16
SWAP7
// stack: STATE, F, K, 16, rounds , sides, virt, retdest
PUSH 1
DUP10
SUB
SWAP9
POP
// stack: STATE, F, K, 16, rounds-1, sides, virt, retdest
%jump(loop)

View File

@ -0,0 +1,117 @@
global k_data:
// Left
BYTES 0x00, 0x00, 0x00, 0x00
BYTES 0x5A, 0x82, 0x79, 0x99
BYTES 0x6E, 0xD9, 0xEB, 0xA1
BYTES 0x8F, 0x1B, 0xBC, 0xDC
BYTES 0xA9, 0x53, 0xFD, 0x4E
// Right
BYTES 0x50, 0xA2, 0x8B, 0xE6
BYTES 0x5C, 0x4D, 0xD1, 0x24
BYTES 0x6D, 0x70, 0x3E, 0xF3
BYTES 0x7A, 0x6D, 0x76, 0xE9
BYTES 0x00, 0x00, 0x00, 0x00
global s_data:
// Left Round 0
BYTES 11, 14, 15, 12
BYTES 05, 08, 07, 09
BYTES 11, 13, 14, 15
BYTES 06, 07, 09, 08
// Left Round 1
BYTES 07, 06, 08, 13
BYTES 11, 09, 07, 15
BYTES 07, 12, 15, 09
BYTES 11, 07, 13, 12
// Left Round 2
BYTES 11, 13, 06, 07
BYTES 14, 09, 13, 15
BYTES 14, 08, 13, 06
BYTES 05, 12, 07, 05
// Left Round 3
BYTES 11, 12, 14, 15
BYTES 14, 15, 09, 08
BYTES 09, 14, 05, 06
BYTES 08, 06, 05, 12
// Left Round 4
BYTES 09, 15, 05, 11
BYTES 06, 08, 13, 12
BYTES 05, 12, 13, 14
BYTES 11, 08, 05, 06
// Right Round 0
BYTES 08, 09, 09, 11
BYTES 13, 15, 15, 05
BYTES 07, 07, 08, 11
BYTES 14, 14, 12, 06
// Right Round 1
BYTES 09, 13, 15, 07
BYTES 12, 08, 09, 11
BYTES 07, 07, 12, 07
BYTES 06, 15, 13, 11
// Right Round 2
BYTES 09, 07, 15, 11
BYTES 08, 06, 06, 14
BYTES 12, 13, 05, 14
BYTES 13, 13, 07, 05
// Right Round 3
BYTES 15, 05, 08, 11
BYTES 14, 14, 06, 14
BYTES 06, 09, 12, 09
BYTES 12, 05, 15, 08
// Right Round 4
BYTES 08, 05, 12, 09
BYTES 12, 05, 14, 06
BYTES 08, 13, 06, 05
BYTES 15, 13, 11, 11
global r_data:
// Left Round 0
BYTES 00, 04, 08, 12
BYTES 16, 20, 24, 28
BYTES 32, 36, 40, 44
BYTES 48, 52, 56, 60
// Left Round 1
BYTES 28, 16, 52, 04
BYTES 40, 24, 60, 12
BYTES 48, 00, 36, 20
BYTES 08, 56, 44, 32
// Left Round 2
BYTES 12, 40, 56, 16
BYTES 36, 60, 32, 04
BYTES 08, 28, 00, 24
BYTES 52, 44, 20, 48
// Left Round 3
BYTES 04, 36, 44, 40
BYTES 00, 32, 48, 16
BYTES 52, 12, 28, 60
BYTES 56, 20, 24, 08
// Left Round 4
BYTES 16, 00, 20, 36
BYTES 28, 48, 08, 40
BYTES 56, 04, 12, 32
BYTES 44, 24, 60, 52
// Right Round 0
BYTES 20, 56, 28, 00
BYTES 36, 08, 44, 16
BYTES 52, 24, 60, 32
BYTES 04, 40, 12, 48
// Right Round 1
BYTES 24, 44, 12, 28
BYTES 00, 52, 20, 40
BYTES 56, 60, 32, 48
BYTES 16, 36, 04, 08
// Right Round 2
BYTES 60, 20, 04, 12
BYTES 28, 56, 24, 36
BYTES 44, 32, 48, 08
BYTES 40, 00, 16, 52
// Right Round 3
BYTES 32, 24, 16, 04
BYTES 12, 44, 60, 00
BYTES 20, 48, 08, 52
BYTES 36, 28, 40, 56
// Right Round 4
BYTES 48, 60, 40, 16
BYTES 04, 20, 32, 28
BYTES 24, 08, 52, 56
BYTES 00, 12, 36, 44

View File

@ -0,0 +1,150 @@
/// def rol(n, x):
/// return (u32(x << n)) | (x >> (32 - n))
global rol:
// stack: n, x, retdest
SWAP1
DUP1
DUP3
// stack: n, x, x, n, retdest
PUSH 32
SUB
// stack: 32-n, x, x, n, retdest
SHR
// stack: x >> (32-n), x, n, retdest
SWAP2
// stack: n, x, x >> (32-n), retdest
SHL
// stack: x << n, x >> (32-n), retdest
%as_u32
// stack: u32(x << n), x >> (32-n), retdest
OR
// stack: u32(x << n) | (x >> (32-n)), retdest
SWAP1
JUMP
// def push_f(rnd):
// Fs = [F0, F1, F2, F3, F4, F4, F3, F2, F1, F0]
// acc = 0
// for i, F in enumerate(Fs):
// acc += (i==rnd)*F
// return acc, rnd
//
// %this_f(i,F) enacts
// acc += (i==rnd)*F
%macro push_f
// stack: rnd
PUSH 0
%this_f(0,F0)
%this_f(1,F1)
%this_f(2,F2)
%this_f(3,F3)
%this_f(4,F4)
%this_f(5,F4)
%this_f(6,F3)
%this_f(7,F2)
%this_f(8,F1)
%this_f(9,F0)
// stack: F, rnd
%endmacro
%macro this_f(i, F)
// stack: acc, rnd
DUP2
// stack: rnd , acc, rnd
%eq_const($i)
// stack: rnd==i , acc, rnd
%mul_const($F)
// stack: (rnd==i)*F , acc, rnd
ADD
// stack: (rnd==j)*F + acc, rnd
%endmacro
/// def F0(x, y, z):
/// return x ^ y ^ z
global F0:
// stack: x , y , z, retdest
XOR
// stack: x ^ y , z, retdest
XOR
// stack: x ^ y ^ z, retdest
SWAP1
JUMP
/// def F1(x, y, z):
/// return (x & y) | (u32(~x) & z)
global F1:
// stack: x, y, z, retdest
DUP1
// stack: x, x, y, z, retdest
SWAP2
// stack: y, x, x, z, retdest
AND
// stack: y & x, x, z, retdest
SWAP2
// stack: z, x, y & x , retdest
SWAP1
// stack: x, z, y & x , retdest
%not_u32
// stack: ~x, z, y & x , retdest
AND
// stack: ~x & z , y & x , retdest
OR
// stack: (~x & z) | (y & x), retdest
SWAP1
JUMP
/// def F2(x, y, z):
/// return (x | u32(~y)) ^ z
global F2:
// stack: x , y, z, retdest
SWAP1
// stack: y , x, z, retdest
%not_u32
// stack: ~y , x , z, retdest
OR
// stack: ~y | x , z, retdest
XOR
// stack: (~y | x) ^ z, retdest
SWAP1
JUMP
/// def F3(x, y, z):
/// return (x & z) | (u32(~z) & y)
global F3:
// stack: x, y , z , retdest
DUP3
// stack: z , x, y , z , retdest
AND
// stack: z & x, y , z , retdest
SWAP2
// stack: z, y, z & x , retdest
%not_u32
// stack: ~z , y, z & x , retdest
AND
// stack: ~z & y, z & x , retdest
OR
// stack: (~z & y) | (z & x), retdest
SWAP1
JUMP
/// def F4(x, y, z):
/// return x ^ (y | u32(~z))
global F4:
// stack: x, y, z, retdest
SWAP2
// stack: z, y, x, retdest
%not_u32
// stack: ~z, y, x, retdest
OR
// stack: ~z | y, x, retdest
XOR
// stack: (~z | y) ^ x, retdest
SWAP1
JUMP

View File

@ -0,0 +1,107 @@
/// Variables beginning with _ are in memory
///
/// def ripemd160(_input):
/// STATE, count, _buffer = [0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0], 0, [0]*64
/// STATE, count, _buffer = ripemd_update(STATE, count, _buffer, len(input) , bytes = _input )
/// STATE, count, _buffer = ripemd_update(STATE, count, _buffer, padlength(len(input)), bytes = [0x80]+[0]*63)
/// STATE, count, _buffer = ripemd_update(STATE, count, _buffer, 8, bytes = size(len(_input)))
/// return process(STATE)
///
/// ripemd is called on a stack with ADDR and length
/// ripemd_stack is called on a stack with length, followed by the input bytes
///
/// ripemd_update receives and return the stack in the form:
/// stack: STATE, count, length, virt
/// where virt is the virtual address of the bytes argument
global ripemd_stack:
// stack: length, INPUT
%stack (length) -> (64, length, 0x80, 63, length, length)
// stack: 64, length, 0x80, 63, length, length, INPUT
%jump(ripemd_storage) // stores the following into memory
// init _buffer at virt 0 [consumes 64]
// store _size at virt 64 [consumes length]
// store _padding at virt 72 [consumes 0x80, 63]
// store _input at virt 136 [consumes length]
global ripemd:
// stack: ADDR, length
%stack (ADDR: 3, length) -> (64, length, 0x80, 63, length, ADDR, length)
// stack: 64, length, 0x80, 63, length, ADDR, length
%jump(ripemd_storage) // stores the following into memory
// init _buffer at virt 0 [consumes 64]
// store _size at virt 64 [consumes length]
// store _padding at virt 72 [consumes 0x80, 63]
// store _input at virt 136 [consumes ADDR, length]
global ripemd_init:
// stack: length
%stack (length) -> ( 0, length, 136, ripemd_1, ripemd_2, process)
// stack: count = 0, length, virt = 136, ripemd_1, ripemd_2, process
%stack () -> (0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0)
// stack: 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0, count, length, virt, LABELS
%jump(ripemd_update)
ripemd_1:
// stack: STATE, count, length , virt , LABELS
DUP7
// stack: length, STATE, count, length , virt , LABELS
%padlength
// stack: padlength, STATE, count, length , virt , LABELS
SWAP7
POP
// stack: STATE, count, length = padlength, virt , LABELS
%stack (STATE: 5, count, length, virt) -> (STATE, count, length, 72)
// STATE, count, length , virt = 72, LABELS
%jump(ripemd_update)
ripemd_2:
// stack: STATE, count, length , virt , LABELS
%stack (STATE: 5, count, length, virt) -> (STATE, count, 8, 64)
// stack: STATE, count, length = 8, virt = 64, LABELS
%jump(ripemd_update)
global process:
// stack: a , b, c, d, e, count, length, virt
%reverse_bytes_u32
%shl_const(128)
// stack: a', b, c, d, e, VARS
SWAP1
%reverse_bytes_u32
%shl_const(96)
OR
// stack: b' a', c, d, e, VARS
SWAP1
%reverse_bytes_u32
%shl_const(64)
OR
// stack: c' b' a', d, e, VARS
SWAP1
%reverse_bytes_u32
%shl_const(32)
OR
// stack: d' c' b' a', e, VARS
SWAP1
%reverse_bytes_u32
OR
// stack: e' d' c' b' a', VARS
%stack (result, VARS: 3, retdest) -> (retdest, result)
// stack: 0xdeadbeef, result
JUMP
/// def padlength(length):
/// t = length % 64
/// return 56 + 64*(t > 55) - t
%macro padlength
// stack: count
%mod_const(64)
// stack: t = count % 64
PUSH 55
DUP2
// stack: t , 55 , t
GT
// stack: t > 55 , t
%mul_const(64)
%add_const(56)
// stack: 56 + 64*(t > 55), t
SUB
%endmacro

View File

@ -0,0 +1,137 @@
global ripemd_storage: // starts by initializing buffer
// stack: i [init: 64]
%store_zeros(64, ripemd_storage)
// stack: (empty)
%jump(store_size)
store_size:
// stack: length
%shl_const(3)
// stack: abcdefgh
%extract_and_store_byte(64)
// stack: abcdefg
%extract_and_store_byte(65)
// stack: abcdef
%extract_and_store_byte(66)
// stack: abcde
%extract_and_store_byte(67)
// stack: abcd
%extract_and_store_byte(68)
// stack: abc
%extract_and_store_byte(69)
// stack: ab
%extract_and_store_byte(70)
// stack: a
%mstore_kernel_general(71)
// stack: 0x80 // padding has 0x80 in first position and zeros elsewhere
%mstore_kernel_general(72) // store first padding term here so as to avoid extra label
%jump(store_padding)
store_padding:
// stack: i [init 63], length
%store_zeros(136, store_padding)
// stack: length
DUP1
%jumpi(store_input_stack)
POP
%jump(ripemd_init)
store_input_stack:
// stack: rem, length, REM_INP
%stack (rem, length, head) -> (length, rem, 136, head, rem, length)
SUB
ADD
// stack: offset, byte, rem, length, REM_INP
%mstore_kernel_general
// stack: rem, length, REM_INP
%decrement
DUP1
// stack: rem - 1, rem - 1, length, REM_INP
%jumpi(store_input_stack)
// stack: 0, length
POP
%jump(ripemd_init)
store_input:
// stack: rem , ADDR , length
DUP4
DUP4
DUP4
MLOAD_GENERAL
// stack: byte, rem , ADDR , length
DUP2
DUP7
SUB
%add_const(136)
// stack: offset, byte, rem , ADDR , length
%mstore_kernel_general
// stack: rem , ADDR , length
%decrement
// stack: rem-1, ADDR , length
SWAP3
%increment
SWAP3
// stack: rem-1, ADDR+1, length
DUP1
%jumpi(store_input)
// stack: 0 , ADDR , length
%pop4
// stack: length
%jump(ripemd_init)
/// def buffer_update(get, set, times):
/// for i in range(times):
/// buffer[set+i] = bytestring[get+i]
global buffer_update:
// stack: get , set , times , retdest
DUP2
DUP2
// stack: get, set, get , set , times , retdest
%mupdate_kernel_general
// stack: get , set , times , retdest
%increment
SWAP1
%increment
SWAP1
SWAP2
%decrement
SWAP2
// stack: get+1, set+1, times-1, retdest
DUP3
%jumpi(buffer_update)
// stack: get , set , 0 , retdest
%pop3
JUMP
%macro store_zeros(N, label)
// stack: i
%stack (i) -> ($N, i, 0, i)
SUB
// stack: offset = N-i, 0, i
%mstore_kernel_general
// stack: i
%decrement
DUP1
// stack: i-1, i-1
%jumpi($label)
// stack: 0
POP
%endmacro
%macro extract_and_store_byte(offset)
// stack: xsy
PUSH 0x100
DUP2
MOD
// stack: y, xsy
%stack (y, xsy) -> (xsy, y, 0x100, y)
// stack: xsy, y, 0x100, y
SUB
DIV
SWAP1
// stack: y, xs
%mstore_kernel_general($offset)
// stack: xs
%endmacro

View File

@ -0,0 +1,108 @@
/// ripemd_update will receive and return the stack in the form:
/// stack: STATE, count, length, virt
///
/// def ripemd_update(state, count, buffer, length, bytestring):
/// have = (count // 8) % 64
/// need = 64 - have
/// shift = 0
/// P = length >= need and have
/// Q = length >= need
/// if P:
/// update_1()
/// if Q:
/// update_2()
/// R = length > shift
/// if R:
/// buffer_update(virt + shift, have, length - shift)
///
/// return state, count + 8*length, buffer
global ripemd_update:
// stack: STATE, count, length, virt, retdest
%stack (STATE: 5, count, length, virt) -> (count, 8, 64, STATE, count, length, virt)
DIV
MOD
// stack: have, STATE, count, length, virt, retdest
DUP1
PUSH 64
SUB
PUSH 0
// stack: shift, need, have, STATE, count, length, virt, retdest
%stack (shift, need, have, STATE: 5, count, length) -> (length, need, STATE, shift, need, have, count, length)
// stack: length, need, STATE, shift, need, have, count, length, virt, retdest
LT
ISZERO
// stack: Q, STATE, shift, need, have, count, length, virt, retdest
%stack (Q, STATE: 5, shift, need, have) -> (have, Q, Q, STATE, shift, need, have)
%gt_const(0)
AND
// stack: P, Q, STATE, shift, need, have, count, length, virt, retdest
%jumpi(update_1)
// stack: Q, STATE, shift, need, have, count, length, virt, retdest
%jumpi(update_2)
final_update:
// stack: STATE, shift, need, have, count, length, virt, retdest
%stack (STATE: 5, shift, need, have, count, length) -> (length, shift, return_step, STATE, shift, need, have, count, length)
SUB
// stack: ARGS: 2, STATE, shift, need, have, count, length, virt, retdest
%stack (ARGS: 2, STATE: 5, shift, need, have, count, length, virt) -> (shift, virt, have, ARGS, STATE, shift, need, have, count, length, virt)
ADD
// stack: ARGS: 4, STATE, shift, need, have, count, length, virt, retdest
%stack (ARGS: 4, STATE: 5, shift, need, have, count, length) -> (length, shift, ARGS, STATE, shift, need, have, count, length)
GT
// stack: R, ARGS: 4, STATE, shift, need, have, count, length, virt, retdest
%jumpi(buffer_update)
// stack: ARGS: 4, STATE, shift, need, have, count, length, virt, retdest
%pop3
JUMP
return_step:
// stack: STATE, shift, need, have, count, length, virt, retdest
SWAP8
DUP10
%mul_const(8)
ADD
SWAP8
// stack: STATE, shift, need, have, count, length, virt, retdest
%stack (STATE: 5, shift, need, have, count, length, virt, retdest) -> (retdest, STATE, count, length, virt)
JUMP
/// def update_1():
/// buffer_update(virt, have, need)
/// shift = need
/// have = 0
/// state = compress(state, buffer)
update_1:
// stack: Q, STATE, shift, need, have, count, length, virt, retdest
%stack (Q, STATE: 5, shift, need, have, count, length, virt) -> (virt, have, need, update_1a, STATE, shift, need, have, count, length, virt)
%jump(buffer_update)
update_1a:
// stack: STATE, shift, need, have, count, length, virt, retdest
%stack (STATE: 5, shift, need, have) -> (STATE, 0, update_2, need, need, 0)
// stack: STATE, 0, update_2, shift = need, need, have = 0, count, length, virt, retdest
%jump(compress)
/// def update_2():
/// while length >= shift + 64:
/// shift += 64
/// state = compress(state, bytestring[shift-64:])
update_2:
// stack: STATE, shift, need, have, count, length, virt, retdest
%stack (STATE: 5, shift, need, have, count, length) -> (64, shift, length, STATE, shift, need, have, count, length)
ADD
GT
// stack: cond, STATE, shift, need, have, count, length, virt, retdest
%jumpi(final_update)
SWAP5
%add_const(64)
SWAP5
%stack (STATE: 5, shift) -> (shift, 64, STATE, shift)
DUP13
ADD
SUB
// stack: offset, STATE, shift, need, have, count, length, virt, retdest
%stack (offset, STATE: 5) -> (STATE, offset, update_2)
// stack: STATE, offset, update_2, shift, need, have, count, length, virt, retdest
%jump(compress)

View File

@ -0,0 +1,285 @@
// We use memory starting at 320 * num_blocks + 2 (after the message schedule
// space) as scratch space to store stack values.
%macro scratch_space_addr_from_num_blocks
// stack: num_blocks
%mul_const(320)
%add_const(2)
%endmacro
global sha2_compression:
// stack: message_schedule_addr, retdest
PUSH 0
// stack: i=0, message_schedule_addr, retdest
SWAP1
// stack: message_schedule_addr, i=0, retdest
PUSH 0
// stack: 0, message_schedule_addr, i=0, retdest
%mload_kernel_general
// stack: num_blocks, message_schedule_addr, i=0, retdest
DUP1
// stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest
%scratch_space_addr_from_num_blocks
// stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest
SWAP1
// stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
// Push the initial hash values; these constants are called H^(0) in the spec.
PUSH 0x5be0cd19 // H^(0)_7
PUSH 0x1f83d9ab // H^(0)_6
PUSH 0x9b05688c // H^(0)_5
PUSH 0x510e527f // H^(0)_4
PUSH 0xa54ff53a // H^(0)_3
PUSH 0x3c6ef372 // H^(0)_2
PUSH 0xbb67ae85 // H^(0)_1
PUSH 0x6a09e667 // H^(0)_0
// stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
compression_start_block:
// Store the current values of the working variables, as the "initial values" to be added back in at the end of this block.
DUP10
// stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP2
DUP2
// stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%add_const(4)
// stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP3
DUP2
// stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%add_const(4)
// stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP4
DUP2
// stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%add_const(4)
// stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP5
DUP2
// stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%add_const(4)
// stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP6
DUP2
// stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%add_const(4)
// stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP7
DUP2
// stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%add_const(4)
// stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP8
DUP2
// stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%add_const(4)
// stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP9
DUP2
// stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
POP
// stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
compression_loop:
// Update the eight working variables, using the next constant K[i] and the next message schedule chunk W[i].
// stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP11
// stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP13
// stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%mul_const(4)
// stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
ADD
// stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%mload_kernel_general_u32
// stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
PUSH sha2_constants_k
// stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP14
// stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%mul_const(4)
// stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
ADD
// stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%mload_kernel_code_u32
// stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%stack (start: 6, e, f, g, h) -> (e, f, g, h, start, e, f, g, h)
// stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%sha2_temp_word1
// stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%stack (t, a, b, c) -> (a, b, c, t, a, b, c)
// stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%sha2_temp_word2
// stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP6
// stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP3
// stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP2
// stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%stack (a, e, b, c, d, old_d, f, g, h, old_h) -> (a, b, c, d, e, f, g, h)
// stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP12
// stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%increment
// stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP1
// stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%eq_const(64)
// stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP1
// stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP12
// stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SUB
// stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP13
// stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest
SWAP1
// stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest
PUSH 256
MUL
// stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest
ADD
// stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest
SWAP12
// stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest
SWAP10
// stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest
POP
// stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest
%and_const(63)
// stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest
SWAP12
// stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest
POP
// stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest
DUP12
// stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest
ISZERO
%jumpi(compression_end_block)
%jump(compression_loop)
compression_end_block:
// Add the initial values of the eight working variables (from the start of this block's compression) back into them.
// stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
// stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%mload_kernel_general_u32
// stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP1
// stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
%add_const(4)
%mload_kernel_general_u32
// stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP2
// stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
%add_const(8)
%mload_kernel_general_u32
// stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP3
// stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
%add_const(12)
%mload_kernel_general_u32
// stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP4
// stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
%add_const(16)
%mload_kernel_general_u32
// stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP5
// stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
%add_const(20)
%mload_kernel_general_u32
// stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP6
// stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
%add_const(24)
%mload_kernel_general_u32
// stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP7
// stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
%add_const(28)
%mload_kernel_general_u32
// stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP8
// stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
DUP1
// stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
ISZERO
// In this case, we've finished all the blocks.
%jumpi(compression_end)
// stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
%stack (num_blocks, working: 8) -> (working, num_blocks)
%jump(compression_start_block)
compression_end:
// stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
POP
// stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
%shl_const(32)
OR
%shl_const(32)
OR
%shl_const(32)
OR
%shl_const(32)
OR
%shl_const(32)
OR
%shl_const(32)
OR
%shl_const(32)
OR
// stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest
SWAP3
// stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest
%pop3
// stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest
SWAP1
JUMP

View File

@ -0,0 +1,65 @@
global sha2_constants_k:
BYTES 66, 138, 47, 152
BYTES 113, 55, 68, 145
BYTES 181, 192, 251, 207
BYTES 233, 181, 219, 165
BYTES 57, 86, 194, 91
BYTES 89, 241, 17, 241
BYTES 146, 63, 130, 164
BYTES 171, 28, 94, 213
BYTES 216, 7, 170, 152
BYTES 18, 131, 91, 1
BYTES 36, 49, 133, 190
BYTES 85, 12, 125, 195
BYTES 114, 190, 93, 116
BYTES 128, 222, 177, 254
BYTES 155, 220, 6, 167
BYTES 193, 155, 241, 116
BYTES 228, 155, 105, 193
BYTES 239, 190, 71, 134
BYTES 15, 193, 157, 198
BYTES 36, 12, 161, 204
BYTES 45, 233, 44, 111
BYTES 74, 116, 132, 170
BYTES 92, 176, 169, 220
BYTES 118, 249, 136, 218
BYTES 152, 62, 81, 82
BYTES 168, 49, 198, 109
BYTES 176, 3, 39, 200
BYTES 191, 89, 127, 199
BYTES 198, 224, 11, 243
BYTES 213, 167, 145, 71
BYTES 6, 202, 99, 81
BYTES 20, 41, 41, 103
BYTES 39, 183, 10, 133
BYTES 46, 27, 33, 56
BYTES 77, 44, 109, 252
BYTES 83, 56, 13, 19
BYTES 101, 10, 115, 84
BYTES 118, 106, 10, 187
BYTES 129, 194, 201, 46
BYTES 146, 114, 44, 133
BYTES 162, 191, 232, 161
BYTES 168, 26, 102, 75
BYTES 194, 75, 139, 112
BYTES 199, 108, 81, 163
BYTES 209, 146, 232, 25
BYTES 214, 153, 6, 36
BYTES 244, 14, 53, 133
BYTES 16, 106, 160, 112
BYTES 25, 164, 193, 22
BYTES 30, 55, 108, 8
BYTES 39, 72, 119, 76
BYTES 52, 176, 188, 181
BYTES 57, 28, 12, 179
BYTES 78, 216, 170, 74
BYTES 91, 156, 202, 79
BYTES 104, 46, 111, 243
BYTES 116, 143, 130, 238
BYTES 120, 165, 99, 111
BYTES 132, 200, 120, 20
BYTES 140, 199, 2, 8
BYTES 144, 190, 255, 250
BYTES 164, 80, 108, 235
BYTES 190, 249, 163, 247
BYTES 198, 113, 120, 242

View File

@ -0,0 +1,240 @@
// We put the message schedule in memory starting at 64 * num_blocks + 2.
%macro message_schedule_addr_from_num_blocks
// stack: num_blocks
%mul_const(64)
%add_const(2)
%endmacro
// Precodition: stack contains address of one message block, followed by output address
// Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks
// of message schedule (in four-byte increments)
gen_message_schedule_from_block:
// stack: block_addr, output_addr, retdest
DUP1
// stack: block_addr, block_addr, output_addr, retdest
%add_const(32)
// stack: block_addr + 32, block_addr, output_addr, retdest
SWAP1
// stack: block_addr, block_addr + 32, output_addr, retdest
%mload_kernel_general_u256
// stack: block[0], block_addr + 32, output_addr, retdest
SWAP1
// stack: block_addr + 32, block[0], output_addr, retdest
%mload_kernel_general_u256
// stack: block[1], block[0], output_addr, retdest
SWAP2
// stack: output_addr, block[0], block[1], retdest
%add_const(28)
PUSH 8
// stack: counter=8, output_addr + 28, block[0], block[1], retdest
%jump(gen_message_schedule_from_block_0_loop)
gen_message_schedule_from_block_0_loop:
// Split the first half (256 bits) of the block into the first eight (32-bit) chunks of the message sdchedule.
// stack: counter, output_addr, block[0], block[1], retdest
SWAP2
// stack: block[0], output_addr, counter, block[1], retdest
DUP1
// stack: block[0], block[0], output_addr, counter, block[1], retdest
%shr_const(32)
// stack: block[0] >> 32, block[0], output_addr, counter, block[1], retdest
SWAP1
// stack: block[0], block[0] >> 32, output_addr, counter, block[1], retdest
%as_u32
// stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest
DUP3
// stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest
%mstore_kernel_general_u32
// stack: block[0] >> 32, output_addr, counter, block[1], retdest
SWAP1
// stack: output_addr, block[0] >> 32, counter, block[1], retdest
%sub_const(4)
// stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest
SWAP1
// stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest
SWAP2
// stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest
%decrement
DUP1
ISZERO
%jumpi(gen_message_schedule_from_block_0_end)
%jump(gen_message_schedule_from_block_0_loop)
gen_message_schedule_from_block_0_end:
// stack: old counter=0, output_addr, block[0], block[1], retdest
POP
PUSH 8
// stack: counter=8, output_addr, block[0], block[1], retdest
%stack (counter, out, b0, b1) -> (out, counter, b1, b0)
// stack: output_addr, counter, block[1], block[0], retdest
%add_const(64)
// stack: output_addr + 64, counter, block[1], block[0], retdest
SWAP1
// stack: counter, output_addr + 64, block[1], block[0], retdest
gen_message_schedule_from_block_1_loop:
// Split the second half (256 bits) of the block into the next eight (32-bit) chunks of the message sdchedule.
// stack: counter, output_addr, block[1], block[0], retdest
SWAP2
// stack: block[1], output_addr, counter, block[0], retdest
DUP1
// stack: block[1], block[1], output_addr, counter, block[0], retdest
%shr_const(32)
// stack: block[1] >> 32, block[1], output_addr, counter, block[0], retdest
SWAP1
// stack: block[1], block[1] >> 32, output_addr, counter, block[0], retdest
%as_u32
// stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest
DUP3
// stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest
%mstore_kernel_general_u32
// stack: block[1] >> 32, output_addr, counter, block[0], retdest
SWAP1
// stack: output_addr, block[1] >> 32, counter, block[0], retdest
%sub_const(4)
// stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest
SWAP1
// stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest
SWAP2
// stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest
%decrement
DUP1
ISZERO
%jumpi(gen_message_schedule_from_block_1_end)
%jump(gen_message_schedule_from_block_1_loop)
gen_message_schedule_from_block_1_end:
// stack: old counter=0, output_addr, block[1], block[0], retdest
POP
// stack: output_addr, block[0], block[1], retdest
PUSH 48
// stack: counter=48, output_addr, block[0], block[1], retdest
SWAP1
// stack: output_addr, counter, block[0], block[1], retdest
%add_const(36)
// stack: output_addr + 36, counter, block[0], block[1], retdest
SWAP1
// stack: counter, output_addr + 36, block[0], block[1], retdest
gen_message_schedule_remaining_loop:
// Generate the next 48 chunks of the message schedule, one at a time, from prior chunks.
// stack: counter, output_addr, block[0], block[1], retdest
SWAP1
// stack: output_addr, counter, block[0], block[1], retdest
DUP1
// stack: output_addr, output_addr, counter, block[0], block[1], retdest
PUSH 2
PUSH 4
MUL
SWAP1
SUB
// stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest
%mload_kernel_general_u32
// stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest
%sha2_sigma_1
// stack: sigma_1(x[output_addr - 2*4]), output_addr, counter, block[0], block[1], retdest
SWAP1
// stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
DUP1
// stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
PUSH 7
PUSH 4
MUL
SWAP1
SUB
// stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
%mload_kernel_general_u32
// stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
SWAP1
// stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
DUP1
// stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
PUSH 15
PUSH 4
MUL
SWAP1
SUB
// stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
%mload_kernel_general_u32
// stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
%sha2_sigma_0
// stack: sigma_0(x[output_addr - 15*4]), output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
SWAP1
// stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
DUP1
// stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
PUSH 16
PUSH 4
MUL
SWAP1
SUB
// stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
%mload_kernel_general_u32
// stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
SWAP1
// stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
SWAP4
// stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest
%add_u32
%add_u32
%add_u32
// stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest
DUP2
// stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest
%mstore_kernel_general_u32
// stack: output_addr, counter, block[0], block[1], retdest
%add_const(4)
// stack: output_addr + 4, counter, block[0], block[1], retdest
SWAP1
// stack: counter, output_addr + 4, block[0], block[1], retdest
%decrement
// stack: counter - 1, output_addr + 4, block[0], block[1], retdest
DUP1
ISZERO
%jumpi(gen_message_schedule_remaining_end)
%jump(gen_message_schedule_remaining_loop)
gen_message_schedule_remaining_end:
// stack: counter=0, output_addr, block[0], block[1], retdest
%pop4
JUMP
// Precodition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63]
// stack contains output_addr
// Postcondition: starting at output_addr, set of 256 bytes per block
// each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments)
global sha2_gen_all_message_schedules:
// stack: output_addr, retdest
DUP1
// stack: output_addr, output_addr, retdest
PUSH 0
// stack: 0, output_addr, output_addr, retdest
%mload_kernel_general
// stack: num_blocks, output_addr, output_addr, retdest
PUSH 1
// stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest
gen_all_message_schedules_loop:
// stack: cur_addr, counter, cur_output_addr, output_addr, retdest
PUSH gen_all_message_schedules_loop_end
// stack: new_retdest = gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest
DUP4
// stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest
DUP3
// stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest
%jump(gen_message_schedule_from_block)
gen_all_message_schedules_loop_end:
// stack: cur_addr, counter, cur_output_addr, output_addr, retdest
%add_const(64)
// stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest
SWAP1
%decrement
SWAP1
// stack: cur_addr + 64, counter - 1, cur_output_addr, output_addr, retdest
SWAP2
%add_const(256)
SWAP2
// stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest
DUP2
// stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest
ISZERO
%jumpi(gen_all_message_schedules_end)
%jump(gen_all_message_schedules_loop)
gen_all_message_schedules_end:
// stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest
%pop3
// stack: output_addr, retdest
%jump(sha2_compression)

View File

@ -0,0 +1,130 @@
// 32-bit right rotation
%macro rotr(rot)
// stack: value
PUSH $rot
// stack: rot, value
DUP2
DUP2
// stack: rot, value, rot, value
SHR
// stack: value >> rot, rot, value
%stack (shifted, rot, value) -> (rot, value, shifted)
// stack: rot, value, value >> rot
PUSH 32
SUB
// stack: 32 - rot, value, value >> rot
SHL
// stack: value << (32 - rot), value >> rot
%as_u32
// stack: (value << (32 - rot)) % (1 << 32), value >> rot
ADD
%endmacro
%macro sha2_sigma_0
// stack: x
DUP1
// stack: x, x
%rotr(7)
// stack: rotr(x, 7), x
%stack (rotated, x) -> (x, x, rotated)
// stack: x, x, rotr(x, 7)
%rotr(18)
// stack: rotr(x, 18), x, rotr(x, 7)
SWAP1
// stack: x, rotr(x, 18), rotr(x, 7)
PUSH 3
SHR
// stack: shr(x, 3), rotr(x, 18), rotr(x, 7)
XOR
XOR
%endmacro
%macro sha2_sigma_1
// stack: x
DUP1
// stack: x, x
%rotr(17)
// stack: rotr(x, 17), x
%stack (rotated, x) -> (x, x, rotated)
// stack: x, x, rotr(x, 17)
%rotr(19)
// stack: rotr(x, 19), x, rotr(x, 17)
SWAP1
// stack: x, rotr(x, 19), rotr(x, 17)
PUSH 10
SHR
// stack: shr(x, 10), rotr(x, 19), rotr(x, 17)
XOR
XOR
%endmacro
%macro sha2_bigsigma_0
// stack: x
DUP1
// stack: x, x
%rotr(2)
// stack: rotr(x, 2), x
%stack (rotated, x) -> (x, x, rotated)
// stack: x, x, rotr(x, 2)
%rotr(13)
// stack: rotr(x, 13), x, rotr(x, 2)
SWAP1
// stack: x, rotr(x, 13), rotr(x, 2)
%rotr(22)
// stack: rotr(x, 22), rotr(x, 13), rotr(x, 2)
XOR
XOR
%endmacro
%macro sha2_bigsigma_1
// stack: x
DUP1
// stack: x, x
%rotr(6)
// stack: rotr(x, 6), x
%stack (rotated, x) -> (x, x, rotated)
// stack: x, x, rotr(x, 6)
%rotr(11)
// stack: rotr(x, 11), x, rotr(x, 6)
SWAP1
// stack: x, rotr(x, 11), rotr(x, 6)
%rotr(25)
// stack: rotr(x, 25), rotr(x, 11), rotr(x, 6)
XOR
XOR
%endmacro
%macro sha2_choice
// stack: x, y, z
DUP1
// stack: x, x, y, z
NOT
// stack: not x, x, y, z
%stack (notx, x, y, z) -> (notx, z, x, y)
// stack: not x, z, x, y
AND
// stack: (not x) and z, x, y
%stack (nxz, x, y) -> (x, y, nxz)
// stack: x, y, (not x) and z
AND
// stack: x and y, (not x) and z
OR
%endmacro
%macro sha2_majority
// stack: x, y, z
%stack (xyz: 3) -> (xyz, xyz)
// stack: x, y, z, x, y, z
AND
// stack: x and y, z, x, y, z
SWAP2
// stack: x, z, x and y, y, z
AND
// stack: x and z, x and y, y, z
%stack (a: 2, b: 2) -> (b, a)
// stack: y, z, x and z, x and y
AND
// stack: y and z, x and z, x and y
OR
OR
%endmacro

View File

@ -0,0 +1,89 @@
global sha2:
%jump(sha2_store)
global sha2_store:
// stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest
DUP1
// stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest
PUSH 0
// stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest
%mstore_kernel_general
// stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest
PUSH 1
// stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest
store_loop:
// stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest
DUP2
// stack: counter, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest
ISZERO
%jumpi(store_end)
// stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest
%stack (addr, counter, val) -> (addr, val, counter, addr)
// stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest
%mstore_kernel_general
// stack: counter, addr, ... , x[num_bytes-1], retdest
%decrement
// stack: counter-1, addr, ... , x[num_bytes-1], retdest
SWAP1
// stack: addr, counter-1, ... , x[num_bytes-1], retdest
%increment
// stack: addr+1, counter-1, ... , x[num_bytes-1], retdest
%jump(store_loop)
store_end:
// stack: addr, counter, retdest
%pop2
// stack: retdest
%jump(sha2_pad)
// Precodition: input is in memory, starting at 0 of kernel general segment, of the form
// num_bytes, x[0], x[1], ..., x[num_bytes - 1]
// Postcodition: output is in memory, starting at 0, of the form
// num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63]
global sha2_pad:
// stack: retdest
PUSH 0
%mload_kernel_general
// stack: num_bytes, retdest
// STEP 1: append 1
// insert 128 (= 1 << 7) at x[num_bytes+1]
// stack: num_bytes, retdest
PUSH 1
PUSH 7
SHL
// stack: 128, num_bytes, retdest
DUP2
// stack: num_bytes, 128, num_bytes, retdest
%increment
// stack: num_bytes+1, 128, num_bytes, retdest
%mstore_kernel_general
// stack: num_bytes, retdest
// STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1
DUP1
// stack: num_bytes, num_bytes, retdest
%add_const(8)
%div_const(64)
%increment
// stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest
// STEP 3: calculate length := num_bytes*8
SWAP1
// stack: num_bytes, num_blocks, retdest
PUSH 8
MUL
// stack: length = num_bytes*8, num_blocks, retdest
// STEP 4: write length to x[num_blocks*64-7..num_blocks*64]
DUP2
// stack: num_blocks, length, num_blocks, retdest
PUSH 64
MUL
// stack: last_addr = num_blocks*64, length, num_blocks, retdest
%sha2_write_length
// stack: num_blocks, retdest
DUP1
// stack: num_blocks, num_blocks, retdest
// STEP 5: write num_blocks to x[0]
PUSH 0
%mstore_kernel_general
// stack: num_blocks, retdest
%message_schedule_addr_from_num_blocks
%jump(sha2_gen_all_message_schedules)

View File

@ -0,0 +1,32 @@
// "T_1" in the SHA-256 spec
%macro sha2_temp_word1
// stack: e, f, g, h, K[i], W[i]
DUP1
// stack: e, e, f, g, h, K[i], W[i]
%sha2_bigsigma_1
// stack: Sigma_1(e), e, f, g, h, K[i], W[i]
%stack (sig, e, f, g) -> (e, f, g, sig)
// stack: e, f, g, Sigma_1(e), h, K[i], W[i]
%sha2_choice
// stack: Ch(e, f, g), Sigma_1(e), h, K[i], W[i]
%add_u32
%add_u32
%add_u32
%add_u32
// stack: Ch(e, f, g) + Sigma_1(e) + h + K[i] + W[i]
%endmacro
// "T_2" in the SHA-256 spec
%macro sha2_temp_word2
// stack: a, b, c
DUP1
// stack: a, a, b, c
%sha2_bigsigma_0
// stack: Sigma_0(a), a, b, c
SWAP3
// stack: c, a, b, Sigma_0(a)
%sha2_majority
// stack: Maj(c, a, b), Sigma_0(a)
%add_u32
// stack: Maj(c, a, b) + Sigma_0(a)
%endmacro

View File

@ -0,0 +1,119 @@
%macro sha2_write_length
// stack: last_addr, length
SWAP1
// stack: length, last_addr
DUP1
// stack: length, length, last_addr
%and_const(0xff)
// stack: length % (1 << 8), length, last_addr
DUP3
// stack: last_addr, length % (1 << 8), length, last_addr
%mstore_kernel_general
// stack: length, last_addr
SWAP1
%decrement
SWAP1
// stack: length, last_addr - 1
%shr_const(8)
// stack: length >> 8, last_addr - 1
DUP1
// stack: length >> 8, length >> 8, last_addr - 1
%and_const(0xff)
// stack: (length >> 8) % (1 << 8), length >> 8, last_addr - 1
DUP3
// stack: last_addr - 1, (length >> 8) % (1 << 8), length >> 8, last_addr - 1
%mstore_kernel_general
// stack: length >> 8, last_addr - 1
SWAP1
%decrement
SWAP1
// stack: length >> 8, last_addr - 2
%shr_const(8)
// stack: length >> 16, last_addr - 2
DUP1
// stack: length >> 16, length >> 16, last_addr - 2
%and_const(0xff)
// stack: (length >> 16) % (1 << 8), length >> 16, last_addr - 2
DUP3
// stack: last_addr - 2, (length >> 16) % (1 << 8), length >> 16, last_addr - 2
%mstore_kernel_general
// stack: length >> 16, last_addr - 2
SWAP1
%decrement
SWAP1
// stack: length >> 16, last_addr - 3
%shr_const(8)
// stack: length >> 24, last_addr - 3
DUP1
// stack: length >> 24, length >> 24, last_addr - 3
%and_const(0xff)
// stack: (length >> 24) % (1 << 8), length >> 24, last_addr - 3
DUP3
// stack: last_addr - 3, (length >> 24) % (1 << 8), length >> 24, last_addr - 3
%mstore_kernel_general
// stack: length >> 24, last_addr - 3
SWAP1
%decrement
SWAP1
// stack: length >> 24, last_addr - 4
%shr_const(8)
// stack: length >> 32, last_addr - 4
DUP1
// stack: length >> 32, length >> 32, last_addr - 4
%and_const(0xff)
// stack: (length >> 32) % (1 << 8), length >> 32, last_addr - 4
DUP3
// stack: last_addr - 4, (length >> 32) % (1 << 8), length >> 32, last_addr - 4
%mstore_kernel_general
// stack: length >> 32, last_addr - 4
SWAP1
%decrement
SWAP1
// stack: length >> 32, last_addr - 5
%shr_const(8)
// stack: length >> 40, last_addr - 5
DUP1
// stack: length >> 40, length >> 40, last_addr - 5
%and_const(0xff)
// stack: (length >> 40) % (1 << 8), length >> 40, last_addr - 5
DUP3
// stack: last_addr - 5, (length >> 40) % (1 << 8), length >> 40, last_addr - 5
%mstore_kernel_general
// stack: length >> 40, last_addr - 5
SWAP1
%decrement
SWAP1
// stack: length >> 40, last_addr - 6
%shr_const(8)
// stack: length >> 48, last_addr - 6
DUP1
// stack: length >> 48, length >> 48, last_addr - 6
%and_const(0xff)
// stack: (length >> 48) % (1 << 8), length >> 48, last_addr - 6
DUP3
// stack: last_addr - 6, (length >> 48) % (1 << 8), length >> 48, last_addr - 6
%mstore_kernel_general
// stack: length >> 48, last_addr - 6
SWAP1
%decrement
SWAP1
// stack: length >> 48, last_addr - 7
%shr_const(8)
// stack: length >> 56, last_addr - 7
DUP1
// stack: length >> 56, length >> 56, last_addr - 7
%and_const(0xff)
// stack: (length >> 56) % (1 << 8), length >> 56, last_addr - 7
DUP3
// stack: last_addr - 7, (length >> 56) % (1 << 8), length >> 56, last_addr - 7
%mstore_kernel_general
%pop2
// stack: (empty)
%endmacro