[Research] x86 code generator (#234)

* rename compilers -> intrinsics, math_gpu -> math_codegen

* stash x86 codegen in research
This commit is contained in:
Mamy Ratsimbazafy 2023-04-27 21:52:51 +02:00 committed by GitHub
parent c6d9a213f2
commit 33c3a2e8c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 343 additions and 38 deletions

View File

@ -7,7 +7,7 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms. # at your option. This file may not be copied, modified, or distributed except according to those terms.
import import
../platforms/gpu/[llvm, nvidia, ir] ../platforms/code_generator/[llvm, nvidia, ir]
# ############################################################ # ############################################################
# #

View File

@ -6,7 +6,7 @@
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms. # at your option. This file may not be copied, modified, or distributed except according to those terms.
import ./compilers/bitops import ./intrinsics/bitops
# ############################################################ # ############################################################
# #

View File

@ -24,7 +24,7 @@ type
ctx*: ContextRef ctx*: ContextRef
module*: ModuleRef module*: ModuleRef
builder*: BuilderRef builder*: BuilderRef
i1_t*, i32_t*, i64_t*, void_t*: TypeRef i1_t*, i32_t*, i64_t*, i128_t*, void_t*: TypeRef
backend*: Backend backend*: Backend
Backend* = enum Backend* = enum
@ -54,7 +54,8 @@ proc new*(T: type Assembler_LLVM, backend: Backend, moduleName: cstring): Assemb
result.builder = result.ctx.createBuilder() result.builder = result.ctx.createBuilder()
result.i1_t = result.ctx.int1_t() result.i1_t = result.ctx.int1_t()
result.i32_t = result.ctx.int32_t() result.i32_t = result.ctx.int32_t()
result.i64_t = result.ctx.int32_t() result.i64_t = result.ctx.int64_t()
result.i128_t = result.ctx.int128_t()
result.void_t = result.ctx.void_t() result.void_t = result.ctx.void_t()
result.backend = backend result.backend = backend

View File

@ -143,6 +143,8 @@ macro genInstr(body: untyped): untyped =
# We could have generic constraint string generation, but we only have 2 arities to support # We could have generic constraint string generation, but we only have 2 arities to support
# and codegen without quote do would be even more verbose and hard to read. # and codegen without quote do would be even more verbose and hard to read.
# TODO: commutative inputs
if arity == 2: if arity == 2:
let op0 = operands[0] let op0 = operands[0]
let op1 = operands[1] let op1 = operands[1]
@ -217,8 +219,7 @@ macro genInstr(body: untyped): untyped =
hasSideEffects = LlvmBool(0), hasSideEffects = LlvmBool(0),
isAlignStack = LlvmBool(0), isAlignStack = LlvmBool(0),
dialect = InlineAsmDialectATT, dialect = InlineAsmDialectATT,
canThrow = LlvmBool(0) canThrow = LlvmBool(0))
)
# 5. Call it # 5. Call it
let opArray = nnkBracket.newTree() let opArray = nnkBracket.newTree()
@ -235,8 +236,7 @@ macro genInstr(body: untyped): untyped =
# builder.call2(ty, inlineASM, [lhs, rhs], name) # builder.call2(ty, inlineASM, [lhs, rhs], name)
instrBody.add newCall( instrBody.add newCall(
ident"call2", ident"builder", fnTy, ident"call2", ident"builder", fnTy,
inlineASM, opArray, ident"name" inlineASM, opArray, ident"name")
)
# 6. Create the function signature # 6. Create the function signature
var opDefs: seq[NimNode] var opDefs: seq[NimNode]
@ -273,8 +273,7 @@ macro genInstr(body: untyped): untyped =
name = nnkPostfix.newTree(ident"*", instrName), name = nnkPostfix.newTree(ident"*", instrName),
params = opDefs, params = opDefs,
procType = nnkProcDef, procType = nnkProcDef,
body = instrBody body = instrBody)
)
# Inline PTX assembly # Inline PTX assembly
# ------------------------------------------------------------ # ------------------------------------------------------------

View File

@ -13,7 +13,7 @@ import
multiplexers, multiplexers,
ct_division ct_division
], ],
compilers/[ intrinsics/[
addcarry_subborrow, addcarry_subborrow,
extended_precision, extended_precision,
compiler_optim_hints compiler_optim_hints

1
research/codegen/nim.cfg Normal file
View File

@ -0,0 +1 @@
--path:../../constantine/platforms/code_generator

95
research/codegen/x86.nim Normal file
View File

@ -0,0 +1,95 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
./bindings/c_abi,
./llvm, ./ir,
./x86_inlineasm,
../primitives
export x86_inlineasm
# ############################################################
#
# x86 API
#
# ############################################################
proc defMulExt*(asy: Assembler_LLVM, wordSize: int): FnDef =
let procName = if wordSize == 64: cstring"hw_mulExt64"
else: cstring"hw_mulExt32"
let doublePrec_t = if wordSize == 64: asy.i128_t
else: asy.i64_t
let mulExtTy = if wordSize == 64: function_t(doublePrec_t, [asy.i64_t, asy.i64_t])
else: function_t(doublePrec_t, [asy.i32_t, asy.i32_t])
let mulExtKernel = asy.module.addFunction(procName, mulExtTy)
let blck = asy.ctx.appendBasicBlock(mulExtKernel, "mulExtBody")
asy.builder.positionAtEnd(blck)
let bld = asy.builder
let a = bld.zext(mulExtKernel.getParam(0), doublePrec_t)
let b = bld.zext(mulExtKernel.getParam(1), doublePrec_t)
let r = bld.mul(a, b)
bld.ret r
return (mulExtTy, mulExtKernel)
proc defHi*(asy: Assembler_LLVM, wordSize: int): FnDef =
let procName = if wordSize == 64: cstring"hw_hi64"
else: cstring"hw_hi32"
let doublePrec_t = if wordSize == 64: asy.i128_t
else: asy.i64_t
let singlePrec_t = if wordSize == 64: asy.i64_t
else: asy.i32_t
let hiTy = function_t(singlePrec_t, [doublePrec_t])
let hiKernel = asy.module.addFunction(procName, hiTy)
let blck = asy.ctx.appendBasicBlock(hiKernel, "hiBody")
asy.builder.positionAtEnd(blck)
let bld = asy.builder
# %1 = zext i32 64 to i128
let shift = bld.zext(constInt(asy.i32_t, culonglong wordSize, signExtend = LlvmBool(0)), doublePrec_t)
# %hiLarge = lshr i128 %input, %1
let hiLarge = bld.lshr(hiKernel.getParam(0), shift)
# %hi = trunc i128 %hiLarge to i64
let hi = bld.trunc(hiLarge, singlePrec_t)
bld.ret hi
return (hiTy, hiKernel)
proc defLo*(asy: Assembler_LLVM, wordSize: int): FnDef =
let procName = if wordSize == 64: cstring"hw_lo64"
else: cstring"hw_lo32"
let doublePrec_t = if wordSize == 64: asy.i128_t
else: asy.i64_t
let singlePrec_t = if wordSize == 64: asy.i64_t
else: asy.i32_t
let loTy = function_t(singlePrec_t, [doublePrec_t])
let loKernel = asy.module.addFunction(procName, loTy)
let blck = asy.ctx.appendBasicBlock(loKernel, "loBody")
asy.builder.positionAtEnd(blck)
let bld = asy.builder
# %lo = trunc i128 %input to i64
let lo = bld.trunc(loKernel.getParam(0), singlePrec_t)
bld.ret lo
return (loTy, loKernel)

View File

@ -0,0 +1,209 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
std/[macros, strutils],
./llvm
# ############################################################
#
# x86 Inline ASM
#
# ############################################################
macro genInstr(body: untyped): untyped =
result = newStmtList()
body.expectKind(nnkStmtList)
for op in body:
op.expectKind(nnkCommand)
doAssert op[0].eqIdent"op"
let instrName = op[1]
# For each op, generate a builder proc
op[2][0].expectKind(nnkTupleConstr)
op[2][0][0].expectKind(nnkStrLit)
op[2][0][1].expectKind(nnkStrLit)
op[2][0][2].expectKind(nnkStrLit)
op[2][0][3].expectKind(nnkBracket)
let instrBody = newStmtList()
# 1. Detect the size of registers
let numBits = ident"numBits"
let regTy = ident"regTy"
let fnTy = ident"fnTy"
let ctx = ident"ctx"
let lhs = op[2][0][3][0]
instrBody.add quote do:
let `ctx` = builder.getContext()
# lhs: ValueRef or uint32 or uint64
let `numBits` = when `lhs` is ValueRef|ConstValueRef: `lhs`.getTypeOf().getIntTypeWidth()
else: 8*sizeof(`lhs`)
let `regTy` = when `lhs` is ValueRef|ConstValueRef: `lhs`.getTypeOf()
elif `lhs` is uint32: `ctx`.int32_t()
elif `lhs` is uint64: `ctx`.int64_t()
else: {.error "Unsupported input type " & $typeof(`lhs`).}
# 2. Create the LLVM asm signature
let operands = op[2][0][3]
let arity = operands.len
let constraintString = op[2][0][2]
let constraints = ident"constraints"
let instr = op[2][0][0]
if arity == 2:
if constraintString.strVal.startsWith('='):
if constraintString.strVal.endsWith('r'):
instrBody.add quote do:
let `fnTy` = function_t(`regTy`, [`regTy`, `regTy`])
else:
instrBody.add quote do:
let `fnTy` = function_t(`regTy`, [`regTy`, pointer_t(`regTy`)])
else:
# We only support out of place "=" function.
# In-place with "+" requires alloca + load/stores in codegen
# in-place functions can be rewritten to be out-place with "matching constraints"
error "Unsupported constraint: " & constraintString.strVal
else:
error "Unsupported arity: " & $arity
# 3. Nothing, we can use the constraint string as is on x86
# 4. Register the inline ASM with LLVM
let inlineASM = ident"inlineASM"
let instrParam = op[2][0][1]
let asmString = ident"asmString"
instrBody.add quote do:
let `asmString` = if numBits == 64: static(`instr` & "q") & static(" " & `instrParam`)
else: static(`instr` & "l") & static(" " & `instrParam`)
instrBody.add quote do:
let `inlineASM` = getInlineAsm(
ty = `fnTy`,
asmString = `asmString`,
constraints = `constraintString`,
# All carry/overflow instructions have sideffect on carry flag and can't be reordered
# However, function calls can't be reordered.
# Relevant operations that affects flags are:
# - MUL, if the compiler decides not to use MULX
# - XOR, for zeroing a register
hasSideEffects = LlvmBool(0),
isAlignStack = LlvmBool(0),
dialect = InlineAsmDialectATT,
canThrow = LlvmBool(0))
# 5. Call it
let opArray = nnkBracket.newTree()
for op in operands:
# when op is ValueRef: op
# else: constInt(uint64(op))
opArray.add newCall(
bindSym"ValueRef",
nnkWhenStmt.newTree(
nnkElifBranch.newTree(nnkInfix.newTree(ident"is", op, bindSym"AnyValueRef"), op),
nnkElse.newTree(newCall(ident"constInt", regTy, newCall(ident"uint64", op)))
)
)
# builder.call2(ty, inlineASM, [lhs, rhs], name)
instrBody.add newCall(
ident"call2", ident"builder", fnTy,
inlineASM, opArray, ident"name")
# 6. Create the function signature
var opDefs: seq[NimNode]
opDefs.add ident"ValueRef" # Return type
opDefs.add newIdentDefs(ident"builder", bindSym"BuilderRef")
block:
var i = 0
for constraint in constraintString.strVal.split(','):
if constraint.startsWith('=') or constraint.startsWith("~{memory}"):
# Don't increment i
continue
elif constraint == "m":
opDefs.add newIdentDefs(operands[i], ident"ValueRef")
elif constraint.endsWith('r') or constraint.endsWith('0'):
opDefs.add newIdentDefs(
operands[i],
nnkInfix.newTree(ident"or",
nnkInfix.newTree(ident"or", ident"AnyValueRef", ident"uint32"),
ident"uint64")
)
else:
error "Unsupported constraint: " & constraint
i += 1
opDefs.add newIdentDefs(ident"name", bindSym"cstring", newLit"")
result.add newProc(
name = nnkPostfix.newTree(ident"*", instrName),
params = opDefs,
procType = nnkProcDef,
body = instrBody)
# Inline x86 assembly
# ------------------------------------------------------------
#
# We can generate add with carry via
# call { i8, i64 } @llvm.x86.addcarry.64(i8 %carryIn, i64 %a, i64 %b)
#
# We can generate multi-precision mul and mulx via
#
# define {i64, i64} @mul(i64 %x, i64 %y) #0 {
#
# %1 = zext i64 %x to i128
# %2 = zext i64 %y to i128
# %r = mul i128 %1, %2
# %3 = zext i32 64 to i128
# %4 = lshr i128 %r, %3
# %hi = trunc i128 %4 to i64
# %lo = trunc i128 %r to i64
#
# %res_tmp = insertvalue {i64, i64} undef, i64 %hi, 0
# %res = insertvalue {i64, i64} %res_tmp, i64 %lo, 1
#
# ret {i64, i64} %res
# }
#
# attributes #0 = {"target-features"="+bmi2"}
#
# mul:
# mov rax, rdi
# mul rsi
# mov rcx, rax
# mov rax, rdx
# mov rdx, rcx
# ret
#
# mul_bmi2:
# mov rdx, rdi
# mulx rax, rdx, rsi
# ret
#
# Note that mul(hi: var rdx, lo: var rax, a: reg/mem64, b: rax)
# - clobbers carry (and many other) flags
# - has fixed output to rdx:rax registers
# while mulx(hi: var reg64, lo: var reg64, a: reg/mem64, b: rdx)
# - does not clobber flags
# - has flexible register outputs
genInstr():
# We are only concerned about the ADCX/ADOX instructions
# which do not have intrinsics or cannot be generated through instruction combining
# unlike llvm.x86.addcarry.u64 that can generate adc
# (cf/of, r) <- a+b+(cf/of)
op adcx_rr: ("adcx", "%2, %0;", "=r,%0,r", [lhs, rhs])
op adcx_rm: ("adcx", "%2, %0;", "=r,0,m", [lhs, rhs])
op adox_rr: ("adox", "%2, %0;", "=r,%0,r", [lhs, rhs])
op adox_rm: ("adox", "%2, %0;", "=r,0,m", [lhs, rhs])

View File

@ -6,7 +6,7 @@
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms. # at your option. This file may not be copied, modified, or distributed except according to those terms.
import ../../constantine/platforms/gpu/llvm import ../../constantine/platforms/code_generator/llvm
echo "LLVM JIT compiler Hello World" echo "LLVM JIT compiler Hello World"

View File

@ -6,7 +6,7 @@
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms. # at your option. This file may not be copied, modified, or distributed except according to those terms.
import ../../constantine/platforms/gpu/[llvm, nvidia, bindings/c_abi] import ../../constantine/platforms/code_generator/[llvm, nvidia, bindings/c_abi]
# ############################################################ # ############################################################
# #

View File

@ -11,12 +11,12 @@ import
# Standard library # Standard library
std/[unittest, times], std/[unittest, times],
# Internal # Internal
../../constantine/platforms/gpu/[llvm, nvidia, ir], ../../constantine/platforms/code_generator/[llvm, nvidia, ir],
../../constantine/platforms/static_for, ../../constantine/platforms/static_for,
../../constantine/math/config/curves, ../../constantine/math/config/curves,
../../constantine/math/io/io_bigints, ../../constantine/math/io/io_bigints,
../../constantine/math/arithmetic, ../../constantine/math/arithmetic,
../../constantine/math_gpu/fields_nvidia, ../../constantine/math_codegen/fields_nvidia,
# Test utilities # Test utilities
../../helpers/prng_unsafe ../../helpers/prng_unsafe