optimizations with rep

This commit is contained in:
Nicholas Ward 2023-03-16 14:58:31 -07:00
parent 92ee77869e
commit 38f79e4991
2 changed files with 22 additions and 150 deletions

View File

@ -132,53 +132,11 @@ compression_end_block:
// stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], retdest
PUSH 0
// stack: 0, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], retdest
SWAP13
// stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, 0, b[0], c[0], d[0], e[0], f[0], g[0], h[0], retdest
%add_u32
// stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, 0, b[0], c[0], d[0], e[0], f[0], g[0], h[0], retdest
SWAP12
// stack: 0, b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0], c[0], d[0], e[0], f[0], g[0], h[0], retdest
SWAP13
// stack: b[0], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], 0, c[0], d[0], e[0], f[0], g[0], h[0], retdest
%add_u32
// stack: b[0]+b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], 0, c[0], d[0], e[0], f[0], g[0], h[0], retdest
SWAP12
// stack: 0, c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0], d[0], e[0], f[0], g[0], h[0], retdest
SWAP13
// stack: c[0], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], 0, d[0], e[0], f[0], g[0], h[0], retdest
%add_u32
// stack: c[0]+c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], 0, d[0], e[0], f[0], g[0], h[0], retdest
SWAP12
// stack: 0, d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0], e[0], f[0], g[0], h[0], retdest
SWAP13
// stack: d[0], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], 0, e[0], f[0], g[0], h[0], retdest
%add_u32
// stack: d[0]+d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], 0, e[0], f[0], g[0], h[0], retdest
SWAP12
// stack: 0, e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0], f[0], g[0], h[0], retdest
SWAP13
// stack: e[0], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], 0, f[0], g[0], h[0], retdest
%add_u32
// stack: e[0]+e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], 0, f[0], g[0], h[0], retdest
SWAP12
// stack: 0, f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0], g[0], h[0], retdest
SWAP13
// stack: f[0], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], 0, g[0], h[0], retdest
%add_u32
// stack: f[0]+f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], 0, g[0], h[0], retdest
SWAP12
// stack: 0, g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0], h[0], retdest
SWAP13
// stack: g[0], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], 0, h[0], retdest
%add_u32
// stack: g[0]+g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], 0, h[0], retdest
SWAP12
// stack: 0, h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0], retdest
SWAP13
// stack: h[0], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], 0, retdest
%add_u32
// stack: h[0]+h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], 0, retdest
SWAP12
%rep 8
SWAP13
%add_u32
SWAP12
%endrep
// stack: 0, num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest
POP
// stack: num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest

View File

@ -10,110 +10,24 @@
// stack: last_addr, length % (1 << 8), length, last_addr
%mstore_kernel_general
// stack: length, last_addr
SWAP1
%decrement
SWAP1
// stack: length, last_addr - 1
%div_const(256) // equivalent to %shr_const(8)
// stack: length >> 8, last_addr - 1
DUP1
// stack: length >> 8, length >> 8, last_addr - 1
%and_const(0xff)
// stack: (length >> 8) % (1 << 8), length >> 8, last_addr - 1
DUP3
// stack: last_addr - 1, (length >> 8) % (1 << 8), length >> 8, last_addr - 1
%mstore_kernel_general
// stack: length >> 8, last_addr - 1
SWAP1
%decrement
SWAP1
// stack: length >> 8, last_addr - 2
%div_const(256) // equivalent to %shr_const(8)
// stack: length >> 16, last_addr - 2
DUP1
// stack: length >> 16, length >> 16, last_addr - 2
%and_const(0xff)
// stack: (length >> 16) % (1 << 8), length >> 16, last_addr - 2
DUP3
// stack: last_addr - 2, (length >> 16) % (1 << 8), length >> 16, last_addr - 2
%mstore_kernel_general
%rep 7
// For i = 0 to 6
// stack: length >> (8 * i), last_addr - i - 1
SWAP1
%decrement
SWAP1
// stack: length >> (8 * i), last_addr - i - 2
%div_const(256) // equivalent to %shr_const(8)
// stack: length >> (8 * (i + 1)), last_addr - i - 2
DUP1
// stack: length >> (8 * (i + 1)), length >> (8 * (i + 1)), last_addr - i - 2
%mod_const(256)
// stack: (length >> (8 * (i + 1))) % (1 << 8), length >> (8 * (i + 1)), last_addr - i - 2
DUP3
// stack: last_addr - i - 2, (length >> (8 * (i + 1))) % (1 << 8), length >> (8 * (i + 1)), last_addr - i - 2
%mstore_kernel_general
%endrep
// stack: length >> 16, last_addr - 2
SWAP1
%decrement
SWAP1
// stack: length >> 16, last_addr - 3
%div_const(256) // equivalent to %shr_const(8)
// stack: length >> 24, last_addr - 3
DUP1
// stack: length >> 24, length >> 24, last_addr - 3
%and_const(0xff)
// stack: (length >> 24) % (1 << 8), length >> 24, last_addr - 3
DUP3
// stack: last_addr - 3, (length >> 24) % (1 << 8), length >> 24, last_addr - 3
%mstore_kernel_general
// stack: length >> 24, last_addr - 3
SWAP1
%decrement
SWAP1
// stack: length >> 24, last_addr - 4
%div_const(256) // equivalent to %shr_const(8)
// stack: length >> 32, last_addr - 4
DUP1
// stack: length >> 32, length >> 32, last_addr - 4
%and_const(0xff)
// stack: (length >> 32) % (1 << 8), length >> 32, last_addr - 4
DUP3
// stack: last_addr - 4, (length >> 32) % (1 << 8), length >> 32, last_addr - 4
%mstore_kernel_general
// stack: length >> 32, last_addr - 4
SWAP1
%decrement
SWAP1
// stack: length >> 32, last_addr - 5
%div_const(256) // equivalent to %shr_const(8)
// stack: length >> 40, last_addr - 5
DUP1
// stack: length >> 40, length >> 40, last_addr - 5
%and_const(0xff)
// stack: (length >> 40) % (1 << 8), length >> 40, last_addr - 5
DUP3
// stack: last_addr - 5, (length >> 40) % (1 << 8), length >> 40, last_addr - 5
%mstore_kernel_general
// stack: length >> 40, last_addr - 5
SWAP1
%decrement
SWAP1
// stack: length >> 40, last_addr - 6
%div_const(256) // equivalent to %shr_const(8)
// stack: length >> 48, last_addr - 6
DUP1
// stack: length >> 48, length >> 48, last_addr - 6
%and_const(0xff)
// stack: (length >> 48) % (1 << 8), length >> 48, last_addr - 6
DUP3
// stack: last_addr - 6, (length >> 48) % (1 << 8), length >> 48, last_addr - 6
%mstore_kernel_general
// stack: length >> 48, last_addr - 6
SWAP1
%decrement
SWAP1
// stack: length >> 48, last_addr - 7
%div_const(256) // equivalent to %shr_const(8)
// stack: length >> 56, last_addr - 7
DUP1
// stack: length >> 56, length >> 56, last_addr - 7
%and_const(0xff)
// stack: (length >> 56) % (1 << 8), length >> 56, last_addr - 7
DUP3
// stack: last_addr - 7, (length >> 56) % (1 << 8), length >> 56, last_addr - 7
%mstore_kernel_general
%pop2
// stack: (empty)
%endmacro