[Research] x86 code generator (#234)
* rename compilers -> intrinsics, math_gpu -> math_codegen * stash x86 codegen in research
This commit is contained in:
parent
c6d9a213f2
commit
33c3a2e8c4
|
@ -7,7 +7,7 @@
|
|||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
../platforms/gpu/[llvm, nvidia, ir]
|
||||
../platforms/code_generator/[llvm, nvidia, ir]
|
||||
|
||||
# ############################################################
|
||||
#
|
|
@ -6,7 +6,7 @@
|
|||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import ./compilers/bitops
|
||||
import ./intrinsics/bitops
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
|
|
|
@ -24,7 +24,7 @@ type
|
|||
ctx*: ContextRef
|
||||
module*: ModuleRef
|
||||
builder*: BuilderRef
|
||||
i1_t*, i32_t*, i64_t*, void_t*: TypeRef
|
||||
i1_t*, i32_t*, i64_t*, i128_t*, void_t*: TypeRef
|
||||
backend*: Backend
|
||||
|
||||
Backend* = enum
|
||||
|
@ -54,7 +54,8 @@ proc new*(T: type Assembler_LLVM, backend: Backend, moduleName: cstring): Assemb
|
|||
result.builder = result.ctx.createBuilder()
|
||||
result.i1_t = result.ctx.int1_t()
|
||||
result.i32_t = result.ctx.int32_t()
|
||||
result.i64_t = result.ctx.int32_t()
|
||||
result.i64_t = result.ctx.int64_t()
|
||||
result.i128_t = result.ctx.int128_t()
|
||||
result.void_t = result.ctx.void_t()
|
||||
result.backend = backend
|
||||
|
|
@ -143,6 +143,8 @@ macro genInstr(body: untyped): untyped =
|
|||
|
||||
# We could have generic constraint string generation, but we only have 2 arities to support
|
||||
# and codegen without quote do would be even more verbose and hard to read.
|
||||
|
||||
# TODO: commutative inputs
|
||||
if arity == 2:
|
||||
let op0 = operands[0]
|
||||
let op1 = operands[1]
|
||||
|
@ -217,8 +219,7 @@ macro genInstr(body: untyped): untyped =
|
|||
hasSideEffects = LlvmBool(0),
|
||||
isAlignStack = LlvmBool(0),
|
||||
dialect = InlineAsmDialectATT,
|
||||
canThrow = LlvmBool(0)
|
||||
)
|
||||
canThrow = LlvmBool(0))
|
||||
|
||||
# 5. Call it
|
||||
let opArray = nnkBracket.newTree()
|
||||
|
@ -235,8 +236,7 @@ macro genInstr(body: untyped): untyped =
|
|||
# builder.call2(ty, inlineASM, [lhs, rhs], name)
|
||||
instrBody.add newCall(
|
||||
ident"call2", ident"builder", fnTy,
|
||||
inlineASM, opArray, ident"name"
|
||||
)
|
||||
inlineASM, opArray, ident"name")
|
||||
|
||||
# 6. Create the function signature
|
||||
var opDefs: seq[NimNode]
|
||||
|
@ -273,8 +273,7 @@ macro genInstr(body: untyped): untyped =
|
|||
name = nnkPostfix.newTree(ident"*", instrName),
|
||||
params = opDefs,
|
||||
procType = nnkProcDef,
|
||||
body = instrBody
|
||||
)
|
||||
body = instrBody)
|
||||
|
||||
# Inline PTX assembly
|
||||
# ------------------------------------------------------------
|
|
@ -13,7 +13,7 @@ import
|
|||
multiplexers,
|
||||
ct_division
|
||||
],
|
||||
compilers/[
|
||||
intrinsics/[
|
||||
addcarry_subborrow,
|
||||
extended_precision,
|
||||
compiler_optim_hints
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
--path:../../constantine/platforms/code_generator
|
|
@ -0,0 +1,95 @@
|
|||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
./bindings/c_abi,
|
||||
./llvm, ./ir,
|
||||
./x86_inlineasm,
|
||||
../primitives
|
||||
|
||||
export x86_inlineasm
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# x86 API
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
proc defMulExt*(asy: Assembler_LLVM, wordSize: int): FnDef =
|
||||
|
||||
let procName = if wordSize == 64: cstring"hw_mulExt64"
|
||||
else: cstring"hw_mulExt32"
|
||||
|
||||
let doublePrec_t = if wordSize == 64: asy.i128_t
|
||||
else: asy.i64_t
|
||||
|
||||
let mulExtTy = if wordSize == 64: function_t(doublePrec_t, [asy.i64_t, asy.i64_t])
|
||||
else: function_t(doublePrec_t, [asy.i32_t, asy.i32_t])
|
||||
let mulExtKernel = asy.module.addFunction(procName, mulExtTy)
|
||||
let blck = asy.ctx.appendBasicBlock(mulExtKernel, "mulExtBody")
|
||||
asy.builder.positionAtEnd(blck)
|
||||
|
||||
let bld = asy.builder
|
||||
|
||||
let a = bld.zext(mulExtKernel.getParam(0), doublePrec_t)
|
||||
let b = bld.zext(mulExtKernel.getParam(1), doublePrec_t)
|
||||
let r = bld.mul(a, b)
|
||||
|
||||
bld.ret r
|
||||
|
||||
return (mulExtTy, mulExtKernel)
|
||||
|
||||
proc defHi*(asy: Assembler_LLVM, wordSize: int): FnDef =
|
||||
|
||||
let procName = if wordSize == 64: cstring"hw_hi64"
|
||||
else: cstring"hw_hi32"
|
||||
let doublePrec_t = if wordSize == 64: asy.i128_t
|
||||
else: asy.i64_t
|
||||
let singlePrec_t = if wordSize == 64: asy.i64_t
|
||||
else: asy.i32_t
|
||||
|
||||
let hiTy = function_t(singlePrec_t, [doublePrec_t])
|
||||
|
||||
let hiKernel = asy.module.addFunction(procName, hiTy)
|
||||
let blck = asy.ctx.appendBasicBlock(hiKernel, "hiBody")
|
||||
asy.builder.positionAtEnd(blck)
|
||||
|
||||
let bld = asy.builder
|
||||
|
||||
# %1 = zext i32 64 to i128
|
||||
let shift = bld.zext(constInt(asy.i32_t, culonglong wordSize, signExtend = LlvmBool(0)), doublePrec_t)
|
||||
# %hiLarge = lshr i128 %input, %1
|
||||
let hiLarge = bld.lshr(hiKernel.getParam(0), shift)
|
||||
# %hi = trunc i128 %hiLarge to i64
|
||||
let hi = bld.trunc(hiLarge, singlePrec_t)
|
||||
|
||||
bld.ret hi
|
||||
|
||||
return (hiTy, hiKernel)
|
||||
|
||||
proc defLo*(asy: Assembler_LLVM, wordSize: int): FnDef =
|
||||
|
||||
let procName = if wordSize == 64: cstring"hw_lo64"
|
||||
else: cstring"hw_lo32"
|
||||
let doublePrec_t = if wordSize == 64: asy.i128_t
|
||||
else: asy.i64_t
|
||||
let singlePrec_t = if wordSize == 64: asy.i64_t
|
||||
else: asy.i32_t
|
||||
|
||||
let loTy = function_t(singlePrec_t, [doublePrec_t])
|
||||
|
||||
let loKernel = asy.module.addFunction(procName, loTy)
|
||||
let blck = asy.ctx.appendBasicBlock(loKernel, "loBody")
|
||||
asy.builder.positionAtEnd(blck)
|
||||
|
||||
let bld = asy.builder
|
||||
|
||||
# %lo = trunc i128 %input to i64
|
||||
let lo = bld.trunc(loKernel.getParam(0), singlePrec_t)
|
||||
bld.ret lo
|
||||
return (loTy, loKernel)
|
|
@ -0,0 +1,209 @@
|
|||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
std/[macros, strutils],
|
||||
./llvm
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# x86 Inline ASM
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
macro genInstr(body: untyped): untyped =
|
||||
result = newStmtList()
|
||||
|
||||
body.expectKind(nnkStmtList)
|
||||
for op in body:
|
||||
op.expectKind(nnkCommand)
|
||||
doAssert op[0].eqIdent"op"
|
||||
|
||||
let instrName = op[1]
|
||||
# For each op, generate a builder proc
|
||||
op[2][0].expectKind(nnkTupleConstr)
|
||||
op[2][0][0].expectKind(nnkStrLit)
|
||||
op[2][0][1].expectKind(nnkStrLit)
|
||||
op[2][0][2].expectKind(nnkStrLit)
|
||||
op[2][0][3].expectKind(nnkBracket)
|
||||
|
||||
let instrBody = newStmtList()
|
||||
|
||||
# 1. Detect the size of registers
|
||||
let numBits = ident"numBits"
|
||||
let regTy = ident"regTy"
|
||||
let fnTy = ident"fnTy"
|
||||
let ctx = ident"ctx"
|
||||
let lhs = op[2][0][3][0]
|
||||
|
||||
instrBody.add quote do:
|
||||
let `ctx` = builder.getContext()
|
||||
# lhs: ValueRef or uint32 or uint64
|
||||
let `numBits` = when `lhs` is ValueRef|ConstValueRef: `lhs`.getTypeOf().getIntTypeWidth()
|
||||
else: 8*sizeof(`lhs`)
|
||||
let `regTy` = when `lhs` is ValueRef|ConstValueRef: `lhs`.getTypeOf()
|
||||
elif `lhs` is uint32: `ctx`.int32_t()
|
||||
elif `lhs` is uint64: `ctx`.int64_t()
|
||||
else: {.error "Unsupported input type " & $typeof(`lhs`).}
|
||||
|
||||
# 2. Create the LLVM asm signature
|
||||
let operands = op[2][0][3]
|
||||
let arity = operands.len
|
||||
|
||||
let constraintString = op[2][0][2]
|
||||
let constraints = ident"constraints"
|
||||
|
||||
let instr = op[2][0][0]
|
||||
|
||||
if arity == 2:
|
||||
if constraintString.strVal.startsWith('='):
|
||||
if constraintString.strVal.endsWith('r'):
|
||||
instrBody.add quote do:
|
||||
let `fnTy` = function_t(`regTy`, [`regTy`, `regTy`])
|
||||
else:
|
||||
instrBody.add quote do:
|
||||
let `fnTy` = function_t(`regTy`, [`regTy`, pointer_t(`regTy`)])
|
||||
else:
|
||||
# We only support out of place "=" function.
|
||||
# In-place with "+" requires alloca + load/stores in codegen
|
||||
# in-place functions can be rewritten to be out-place with "matching constraints"
|
||||
error "Unsupported constraint: " & constraintString.strVal
|
||||
else:
|
||||
error "Unsupported arity: " & $arity
|
||||
|
||||
# 3. Nothing, we can use the constraint string as is on x86
|
||||
|
||||
# 4. Register the inline ASM with LLVM
|
||||
let inlineASM = ident"inlineASM"
|
||||
let instrParam = op[2][0][1]
|
||||
let asmString = ident"asmString"
|
||||
|
||||
|
||||
instrBody.add quote do:
|
||||
let `asmString` = if numBits == 64: static(`instr` & "q") & static(" " & `instrParam`)
|
||||
else: static(`instr` & "l") & static(" " & `instrParam`)
|
||||
|
||||
instrBody.add quote do:
|
||||
let `inlineASM` = getInlineAsm(
|
||||
ty = `fnTy`,
|
||||
asmString = `asmString`,
|
||||
constraints = `constraintString`,
|
||||
# All carry/overflow instructions have sideffect on carry flag and can't be reordered
|
||||
# However, function calls can't be reordered.
|
||||
# Relevant operations that affects flags are:
|
||||
# - MUL, if the compiler decides not to use MULX
|
||||
# - XOR, for zeroing a register
|
||||
hasSideEffects = LlvmBool(0),
|
||||
isAlignStack = LlvmBool(0),
|
||||
dialect = InlineAsmDialectATT,
|
||||
canThrow = LlvmBool(0))
|
||||
|
||||
# 5. Call it
|
||||
let opArray = nnkBracket.newTree()
|
||||
for op in operands:
|
||||
# when op is ValueRef: op
|
||||
# else: constInt(uint64(op))
|
||||
opArray.add newCall(
|
||||
bindSym"ValueRef",
|
||||
nnkWhenStmt.newTree(
|
||||
nnkElifBranch.newTree(nnkInfix.newTree(ident"is", op, bindSym"AnyValueRef"), op),
|
||||
nnkElse.newTree(newCall(ident"constInt", regTy, newCall(ident"uint64", op)))
|
||||
)
|
||||
)
|
||||
# builder.call2(ty, inlineASM, [lhs, rhs], name)
|
||||
instrBody.add newCall(
|
||||
ident"call2", ident"builder", fnTy,
|
||||
inlineASM, opArray, ident"name")
|
||||
|
||||
# 6. Create the function signature
|
||||
var opDefs: seq[NimNode]
|
||||
opDefs.add ident"ValueRef" # Return type
|
||||
opDefs.add newIdentDefs(ident"builder", bindSym"BuilderRef")
|
||||
block:
|
||||
var i = 0
|
||||
for constraint in constraintString.strVal.split(','):
|
||||
if constraint.startsWith('=') or constraint.startsWith("~{memory}"):
|
||||
# Don't increment i
|
||||
continue
|
||||
elif constraint == "m":
|
||||
opDefs.add newIdentDefs(operands[i], ident"ValueRef")
|
||||
elif constraint.endsWith('r') or constraint.endsWith('0'):
|
||||
opDefs.add newIdentDefs(
|
||||
operands[i],
|
||||
nnkInfix.newTree(ident"or",
|
||||
nnkInfix.newTree(ident"or", ident"AnyValueRef", ident"uint32"),
|
||||
ident"uint64")
|
||||
)
|
||||
else:
|
||||
error "Unsupported constraint: " & constraint
|
||||
i += 1
|
||||
opDefs.add newIdentDefs(ident"name", bindSym"cstring", newLit"")
|
||||
|
||||
result.add newProc(
|
||||
name = nnkPostfix.newTree(ident"*", instrName),
|
||||
params = opDefs,
|
||||
procType = nnkProcDef,
|
||||
body = instrBody)
|
||||
|
||||
# Inline x86 assembly
|
||||
# ------------------------------------------------------------
|
||||
#
|
||||
# We can generate add with carry via
|
||||
# call { i8, i64 } @llvm.x86.addcarry.64(i8 %carryIn, i64 %a, i64 %b)
|
||||
#
|
||||
# We can generate multi-precision mul and mulx via
|
||||
#
|
||||
# define {i64, i64} @mul(i64 %x, i64 %y) #0 {
|
||||
#
|
||||
# %1 = zext i64 %x to i128
|
||||
# %2 = zext i64 %y to i128
|
||||
# %r = mul i128 %1, %2
|
||||
# %3 = zext i32 64 to i128
|
||||
# %4 = lshr i128 %r, %3
|
||||
# %hi = trunc i128 %4 to i64
|
||||
# %lo = trunc i128 %r to i64
|
||||
#
|
||||
# %res_tmp = insertvalue {i64, i64} undef, i64 %hi, 0
|
||||
# %res = insertvalue {i64, i64} %res_tmp, i64 %lo, 1
|
||||
#
|
||||
# ret {i64, i64} %res
|
||||
# }
|
||||
#
|
||||
# attributes #0 = {"target-features"="+bmi2"}
|
||||
#
|
||||
# mul:
|
||||
# mov rax, rdi
|
||||
# mul rsi
|
||||
# mov rcx, rax
|
||||
# mov rax, rdx
|
||||
# mov rdx, rcx
|
||||
# ret
|
||||
#
|
||||
# mul_bmi2:
|
||||
# mov rdx, rdi
|
||||
# mulx rax, rdx, rsi
|
||||
# ret
|
||||
#
|
||||
# Note that mul(hi: var rdx, lo: var rax, a: reg/mem64, b: rax)
|
||||
# - clobbers carry (and many other) flags
|
||||
# - has fixed output to rdx:rax registers
|
||||
# while mulx(hi: var reg64, lo: var reg64, a: reg/mem64, b: rdx)
|
||||
# - does not clobber flags
|
||||
# - has flexible register outputs
|
||||
|
||||
|
||||
genInstr():
|
||||
# We are only concerned about the ADCX/ADOX instructions
|
||||
# which do not have intrinsics or cannot be generated through instruction combining
|
||||
# unlike llvm.x86.addcarry.u64 that can generate adc
|
||||
|
||||
# (cf/of, r) <- a+b+(cf/of)
|
||||
op adcx_rr: ("adcx", "%2, %0;", "=r,%0,r", [lhs, rhs])
|
||||
op adcx_rm: ("adcx", "%2, %0;", "=r,0,m", [lhs, rhs])
|
||||
op adox_rr: ("adox", "%2, %0;", "=r,%0,r", [lhs, rhs])
|
||||
op adox_rm: ("adox", "%2, %0;", "=r,0,m", [lhs, rhs])
|
|
@ -6,7 +6,7 @@
|
|||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import ../../constantine/platforms/gpu/llvm
|
||||
import ../../constantine/platforms/code_generator/llvm
|
||||
|
||||
echo "LLVM JIT compiler Hello World"
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import ../../constantine/platforms/gpu/[llvm, nvidia, bindings/c_abi]
|
||||
import ../../constantine/platforms/code_generator/[llvm, nvidia, bindings/c_abi]
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
|
|
|
@ -11,12 +11,12 @@ import
|
|||
# Standard library
|
||||
std/[unittest, times],
|
||||
# Internal
|
||||
../../constantine/platforms/gpu/[llvm, nvidia, ir],
|
||||
../../constantine/platforms/code_generator/[llvm, nvidia, ir],
|
||||
../../constantine/platforms/static_for,
|
||||
../../constantine/math/config/curves,
|
||||
../../constantine/math/io/io_bigints,
|
||||
../../constantine/math/arithmetic,
|
||||
../../constantine/math_gpu/fields_nvidia,
|
||||
../../constantine/math_codegen/fields_nvidia,
|
||||
# Test utilities
|
||||
../../helpers/prng_unsafe
|
||||
|
||||
|
|
Loading…
Reference in New Issue