mirror of
https://github.com/logos-storage/constantine.git
synced 2026-01-02 13:13:07 +00:00
209 lines
7.0 KiB
Nim
209 lines
7.0 KiB
Nim
|
|
# Constantine
|
||
|
|
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||
|
|
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||
|
|
# Licensed and distributed under either of
|
||
|
|
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||
|
|
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||
|
|
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||
|
|
|
||
|
|
import
|
||
|
|
std/[macros, strutils],
|
||
|
|
./llvm
|
||
|
|
|
||
|
|
# ############################################################
|
||
|
|
#
|
||
|
|
# x86 Inline ASM
|
||
|
|
#
|
||
|
|
# ############################################################
|
||
|
|
|
||
|
|
macro genInstr(body: untyped): untyped =
|
||
|
|
result = newStmtList()
|
||
|
|
|
||
|
|
body.expectKind(nnkStmtList)
|
||
|
|
for op in body:
|
||
|
|
op.expectKind(nnkCommand)
|
||
|
|
doAssert op[0].eqIdent"op"
|
||
|
|
|
||
|
|
let instrName = op[1]
|
||
|
|
# For each op, generate a builder proc
|
||
|
|
op[2][0].expectKind(nnkTupleConstr)
|
||
|
|
op[2][0][0].expectKind(nnkStrLit)
|
||
|
|
op[2][0][1].expectKind(nnkStrLit)
|
||
|
|
op[2][0][2].expectKind(nnkStrLit)
|
||
|
|
op[2][0][3].expectKind(nnkBracket)
|
||
|
|
|
||
|
|
let instrBody = newStmtList()
|
||
|
|
|
||
|
|
# 1. Detect the size of registers
|
||
|
|
let numBits = ident"numBits"
|
||
|
|
let regTy = ident"regTy"
|
||
|
|
let fnTy = ident"fnTy"
|
||
|
|
let ctx = ident"ctx"
|
||
|
|
let lhs = op[2][0][3][0]
|
||
|
|
|
||
|
|
instrBody.add quote do:
|
||
|
|
let `ctx` = builder.getContext()
|
||
|
|
# lhs: ValueRef or uint32 or uint64
|
||
|
|
let `numBits` = when `lhs` is ValueRef|ConstValueRef: `lhs`.getTypeOf().getIntTypeWidth()
|
||
|
|
else: 8*sizeof(`lhs`)
|
||
|
|
let `regTy` = when `lhs` is ValueRef|ConstValueRef: `lhs`.getTypeOf()
|
||
|
|
elif `lhs` is uint32: `ctx`.int32_t()
|
||
|
|
elif `lhs` is uint64: `ctx`.int64_t()
|
||
|
|
else: {.error "Unsupported input type " & $typeof(`lhs`).}
|
||
|
|
|
||
|
|
# 2. Create the LLVM asm signature
|
||
|
|
let operands = op[2][0][3]
|
||
|
|
let arity = operands.len
|
||
|
|
|
||
|
|
let constraintString = op[2][0][2]
|
||
|
|
let constraints = ident"constraints"
|
||
|
|
|
||
|
|
let instr = op[2][0][0]
|
||
|
|
|
||
|
|
if arity == 2:
|
||
|
|
if constraintString.strVal.startsWith('='):
|
||
|
|
if constraintString.strVal.endsWith('r'):
|
||
|
|
instrBody.add quote do:
|
||
|
|
let `fnTy` = function_t(`regTy`, [`regTy`, `regTy`])
|
||
|
|
else:
|
||
|
|
instrBody.add quote do:
|
||
|
|
let `fnTy` = function_t(`regTy`, [`regTy`, pointer_t(`regTy`)])
|
||
|
|
else:
|
||
|
|
# We only support out of place "=" function.
|
||
|
|
# In-place with "+" requires alloca + load/stores in codegen
|
||
|
|
# in-place functions can be rewritten to be out-place with "matching constraints"
|
||
|
|
error "Unsupported constraint: " & constraintString.strVal
|
||
|
|
else:
|
||
|
|
error "Unsupported arity: " & $arity
|
||
|
|
|
||
|
|
# 3. Nothing, we can use the constraint string as is on x86
|
||
|
|
|
||
|
|
# 4. Register the inline ASM with LLVM
|
||
|
|
let inlineASM = ident"inlineASM"
|
||
|
|
let instrParam = op[2][0][1]
|
||
|
|
let asmString = ident"asmString"
|
||
|
|
|
||
|
|
|
||
|
|
instrBody.add quote do:
|
||
|
|
let `asmString` = if numBits == 64: static(`instr` & "q") & static(" " & `instrParam`)
|
||
|
|
else: static(`instr` & "l") & static(" " & `instrParam`)
|
||
|
|
|
||
|
|
instrBody.add quote do:
|
||
|
|
let `inlineASM` = getInlineAsm(
|
||
|
|
ty = `fnTy`,
|
||
|
|
asmString = `asmString`,
|
||
|
|
constraints = `constraintString`,
|
||
|
|
# All carry/overflow instructions have sideffect on carry flag and can't be reordered
|
||
|
|
# However, function calls can't be reordered.
|
||
|
|
# Relevant operations that affects flags are:
|
||
|
|
# - MUL, if the compiler decides not to use MULX
|
||
|
|
# - XOR, for zeroing a register
|
||
|
|
hasSideEffects = LlvmBool(0),
|
||
|
|
isAlignStack = LlvmBool(0),
|
||
|
|
dialect = InlineAsmDialectATT,
|
||
|
|
canThrow = LlvmBool(0))
|
||
|
|
|
||
|
|
# 5. Call it
|
||
|
|
let opArray = nnkBracket.newTree()
|
||
|
|
for op in operands:
|
||
|
|
# when op is ValueRef: op
|
||
|
|
# else: constInt(uint64(op))
|
||
|
|
opArray.add newCall(
|
||
|
|
bindSym"ValueRef",
|
||
|
|
nnkWhenStmt.newTree(
|
||
|
|
nnkElifBranch.newTree(nnkInfix.newTree(ident"is", op, bindSym"AnyValueRef"), op),
|
||
|
|
nnkElse.newTree(newCall(ident"constInt", regTy, newCall(ident"uint64", op)))
|
||
|
|
)
|
||
|
|
)
|
||
|
|
# builder.call2(ty, inlineASM, [lhs, rhs], name)
|
||
|
|
instrBody.add newCall(
|
||
|
|
ident"call2", ident"builder", fnTy,
|
||
|
|
inlineASM, opArray, ident"name")
|
||
|
|
|
||
|
|
# 6. Create the function signature
|
||
|
|
var opDefs: seq[NimNode]
|
||
|
|
opDefs.add ident"ValueRef" # Return type
|
||
|
|
opDefs.add newIdentDefs(ident"builder", bindSym"BuilderRef")
|
||
|
|
block:
|
||
|
|
var i = 0
|
||
|
|
for constraint in constraintString.strVal.split(','):
|
||
|
|
if constraint.startsWith('=') or constraint.startsWith("~{memory}"):
|
||
|
|
# Don't increment i
|
||
|
|
continue
|
||
|
|
elif constraint == "m":
|
||
|
|
opDefs.add newIdentDefs(operands[i], ident"ValueRef")
|
||
|
|
elif constraint.endsWith('r') or constraint.endsWith('0'):
|
||
|
|
opDefs.add newIdentDefs(
|
||
|
|
operands[i],
|
||
|
|
nnkInfix.newTree(ident"or",
|
||
|
|
nnkInfix.newTree(ident"or", ident"AnyValueRef", ident"uint32"),
|
||
|
|
ident"uint64")
|
||
|
|
)
|
||
|
|
else:
|
||
|
|
error "Unsupported constraint: " & constraint
|
||
|
|
i += 1
|
||
|
|
opDefs.add newIdentDefs(ident"name", bindSym"cstring", newLit"")
|
||
|
|
|
||
|
|
result.add newProc(
|
||
|
|
name = nnkPostfix.newTree(ident"*", instrName),
|
||
|
|
params = opDefs,
|
||
|
|
procType = nnkProcDef,
|
||
|
|
body = instrBody)
|
||
|
|
|
||
|
|
# Inline x86 assembly
|
||
|
|
# ------------------------------------------------------------
|
||
|
|
#
|
||
|
|
# We can generate add with carry via
|
||
|
|
# call { i8, i64 } @llvm.x86.addcarry.64(i8 %carryIn, i64 %a, i64 %b)
|
||
|
|
#
|
||
|
|
# We can generate multi-precision mul and mulx via
|
||
|
|
#
|
||
|
|
# define {i64, i64} @mul(i64 %x, i64 %y) #0 {
|
||
|
|
#
|
||
|
|
# %1 = zext i64 %x to i128
|
||
|
|
# %2 = zext i64 %y to i128
|
||
|
|
# %r = mul i128 %1, %2
|
||
|
|
# %3 = zext i32 64 to i128
|
||
|
|
# %4 = lshr i128 %r, %3
|
||
|
|
# %hi = trunc i128 %4 to i64
|
||
|
|
# %lo = trunc i128 %r to i64
|
||
|
|
#
|
||
|
|
# %res_tmp = insertvalue {i64, i64} undef, i64 %hi, 0
|
||
|
|
# %res = insertvalue {i64, i64} %res_tmp, i64 %lo, 1
|
||
|
|
#
|
||
|
|
# ret {i64, i64} %res
|
||
|
|
# }
|
||
|
|
#
|
||
|
|
# attributes #0 = {"target-features"="+bmi2"}
|
||
|
|
#
|
||
|
|
# mul:
|
||
|
|
# mov rax, rdi
|
||
|
|
# mul rsi
|
||
|
|
# mov rcx, rax
|
||
|
|
# mov rax, rdx
|
||
|
|
# mov rdx, rcx
|
||
|
|
# ret
|
||
|
|
#
|
||
|
|
# mul_bmi2:
|
||
|
|
# mov rdx, rdi
|
||
|
|
# mulx rax, rdx, rsi
|
||
|
|
# ret
|
||
|
|
#
|
||
|
|
# Note that mul(hi: var rdx, lo: var rax, a: reg/mem64, b: rax)
|
||
|
|
# - clobbers carry (and many other) flags
|
||
|
|
# - has fixed output to rdx:rax registers
|
||
|
|
# while mulx(hi: var reg64, lo: var reg64, a: reg/mem64, b: rdx)
|
||
|
|
# - does not clobber flags
|
||
|
|
# - has flexible register outputs
|
||
|
|
|
||
|
|
|
||
|
|
genInstr():
|
||
|
|
# We are only concerned about the ADCX/ADOX instructions
|
||
|
|
# which do not have intrinsics or cannot be generated through instruction combining
|
||
|
|
# unlike llvm.x86.addcarry.u64 that can generate adc
|
||
|
|
|
||
|
|
# (cf/of, r) <- a+b+(cf/of)
|
||
|
|
op adcx_rr: ("adcx", "%2, %0;", "=r,%0,r", [lhs, rhs])
|
||
|
|
op adcx_rm: ("adcx", "%2, %0;", "=r,0,m", [lhs, rhs])
|
||
|
|
op adox_rr: ("adox", "%2, %0;", "=r,%0,r", [lhs, rhs])
|
||
|
|
op adox_rm: ("adox", "%2, %0;", "=r,0,m", [lhs, rhs])
|