* Add CI

* fix windows dlls and nightly->devel (how to pull the nightlies?)

* Nim devel csources are broken due to  /nim/config/nim.cfg(16, 1) Error: invalid command line option: '--hint'

* some benches requires POSIX

* deactivate depth-first search bench on Windows

* rename workflow

* mistake in skipping DFS on windows

* Fix aligned allocation requirements
This commit is contained in:
Mamy Ratsimbazafy 2021-07-01 20:24:50 +02:00 committed by GitHub
parent a1e350094b
commit 39b90fa4a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 354 additions and 15 deletions

181
.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,181 @@
name: Taskpools CI
on: [push, pull_request]
jobs:
build:
strategy:
fail-fast: false
max-parallel: 20
matrix:
branch: [version-1-2, version-1-4]
target:
- os: linux
cpu: amd64
TEST_LANG: c
- os: linux
cpu: amd64
TEST_LANG: cpp
- os: linux
cpu: i386
TEST_LANG: c
- os: macos
cpu: amd64
TEST_LANG: c
- os: windows
cpu: amd64
TEST_LANG: c
- os: windows
cpu: amd64
TEST_LANG: cpp
- os: windows
cpu: i386
TEST_LANG: c
include:
- target:
os: linux
builder: ubuntu-18.04
- target:
os: macos
builder: macos-10.15
- target:
os: windows
builder: windows-2019
name: '${{ matrix.target.os }}-${{ matrix.target.cpu }}-${{ matrix.target.TEST_LANG }}-${{ matrix.target.BACKEND }} (${{ matrix.branch }})'
runs-on: ${{ matrix.builder }}
steps:
- name: Cancel Previous Runs
uses: styfle/cancel-workflow-action@0.5.0
with:
access_token: ${{ github.token }}
- name: Checkout taskpools
uses: actions/checkout@v2
with:
path: nim-taskpools
- name: Install dependencies (Linux i386)
if: runner.os == 'Linux' && matrix.target.cpu == 'i386'
run: |
sudo dpkg --add-architecture i386
sudo apt-fast update -qq
sudo DEBIAN_FRONTEND='noninteractive' apt-fast install \
--no-install-recommends -yq gcc-multilib g++-multilib \
libssl-dev:i386
mkdir -p external/bin
cat << EOF > external/bin/gcc
#!/bin/bash
exec $(which gcc) -m32 "\$@"
EOF
cat << EOF > external/bin/g++
#!/bin/bash
exec $(which g++) -m32 "\$@"
EOF
chmod 755 external/bin/gcc external/bin/g++
echo '${{ github.workspace }}/external/bin' >> $GITHUB_PATH
- name: Install dependencies (Windows)
if: runner.os == 'Windows'
shell: bash
run: |
mkdir external
if [[ '${{ matrix.target.cpu }}' == 'amd64' ]]; then
arch=64
else
arch=32
fi
curl -L "https://nim-lang.org/download/mingw$arch.7z" -o "external/mingw$arch.7z"
curl -L "https://nim-lang.org/download/windeps.zip" -o external/windeps.zip
7z x "external/mingw$arch.7z" -oexternal/
7z x external/windeps.zip -oexternal/dlls
echo '${{ github.workspace }}'"/external/mingw$arch/bin" >> $GITHUB_PATH
echo '${{ github.workspace }}'"/external/dlls" >> $GITHUB_PATH
- name: Setup environment
shell: bash
run: echo '${{ github.workspace }}/nim/bin' >> $GITHUB_PATH
- name: Get latest Nim commit hash
id: versions
shell: bash
run: |
getHash() {
git ls-remote "https://github.com/$1" "${2:-HEAD}" | cut -f 1
}
nimHash=$(getHash nim-lang/Nim '${{ matrix.branch }}')
csourcesHash=$(getHash nim-lang/csources)
echo "::set-output name=nim::$nimHash"
echo "::set-output name=csources::$csourcesHash"
- name: Restore prebuilt Nim from cache
id: nim-cache
uses: actions/cache@v1
with:
path: nim
key: 'nim-${{ matrix.target.os }}-${{ matrix.target.cpu }}-${{ steps.versions.outputs.nim }}'
- name: Restore prebuilt csources from cache
if: steps.nim-cache.outputs.cache-hit != 'true'
id: csources-cache
uses: actions/cache@v1
with:
path: csources/bin
key: 'csources-${{ matrix.target.os }}-${{ matrix.target.cpu }}-${{ steps.versions.outputs.csources }}'
- name: Checkout Nim csources
if: >
steps.csources-cache.outputs.cache-hit != 'true' &&
steps.nim-cache.outputs.cache-hit != 'true'
uses: actions/checkout@v2
with:
repository: nim-lang/csources
path: csources
ref: ${{ steps.versions.outputs.csources }}
- name: Checkout Nim
if: steps.nim-cache.outputs.cache-hit != 'true'
uses: actions/checkout@v2
with:
repository: nim-lang/Nim
path: nim
ref: ${{ steps.versions.outputs.nim }}
- name: Build Nim and associated tools
if: steps.nim-cache.outputs.cache-hit != 'true'
shell: bash
run: |
ncpu=
ext=
case '${{ runner.os }}' in
'Linux')
ncpu=$(nproc)
;;
'macOS')
ncpu=$(sysctl -n hw.ncpu)
;;
'Windows')
ncpu=$NUMBER_OF_PROCESSORS
ext=.exe
;;
esac
[[ -z "$ncpu" || $ncpu -le 0 ]] && ncpu=1
if [[ ! -e csources/bin/nim$ext ]]; then
make -C csources -j $ncpu CC=gcc ucpu='${{ matrix.target.cpu }}'
else
echo 'Using prebuilt csources'
fi
cp -v csources/bin/nim$ext nim/bin
cd nim
nim c koch
./koch boot -d:release
./koch tools -d:release
# clean up to save cache space
rm koch
rm -rf nimcache
rm -rf dist
rm -rf .git
- name: Run taskpools tests
shell: bash
run: |
export UCPU="$cpu"
cd nim-taskpools
nimble test

View File

@ -34,11 +34,13 @@ task test, "Run Taskpools tests":
test "", "examples/e01_simple_tasks.nim"
# Benchmarks
test "", "benchmarks/bouncing_producer_consumer/taskpool_bpc.nim"
test "", "benchmarks/dfs/taskpool_dfs.nim"
test "", "benchmarks/heat/taskpool_heat.nim"
test "", "benchmarks/nqueens/taskpool_nqueens.nim"
test "", "benchmarks/single_task_producer/taskpool_spc.nim"
when not defined(windows):
test "", "benchmarks/single_task_producer/taskpool_spc.nim"
test "", "benchmarks/bouncing_producer_consumer/taskpool_bpc.nim"
# TODO - generics in macro issue
# test "", "benchmarks/matmul_cache_oblivious/taskpool_matmul_co.nim"

View File

@ -6,10 +6,11 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
./channels_spsc_single,
system/ansi_c,
std/os,
./instrumentation/contracts,
std/os
./channels_spsc_single,
./primitives/allocs
{.push gcsafe.}
@ -32,13 +33,13 @@ type
proc newFlowVar*(T: typedesc): Flowvar[T] {.inline.} =
let size = 2 + sizeof(T) # full flag + item size + buffer
result.chan = cast[ptr ChannelSPSCSingle](c_calloc(1, csize_t size))
result.chan = wv_allocAligned(ChannelSPSCSingle, size, alignment = 64)
result.chan[].initialize(sizeof(T))
proc cleanup(fv: Flowvar) {.inline.} =
# TODO: Nim v1.4+ can use "sink Flowvar"
if not fv.chan.isNil:
c_free(fv.chan)
wv_freeAligned(fv.chan)
func isSpawned*(fv: Flowvar): bool {.inline.} =
## Returns true if a flowvar is spawned

View File

@ -0,0 +1,152 @@
# Weave
# Copyright (c) 2019 Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
system/ansi_c
# Helpers
# ----------------------------------------------------------------------------------
proc isPowerOfTwo*(n: int): bool {.inline.} =
(n and (n - 1)) == 0
# TODO: cannot dispatch at compile-time due to https://github.com/nim-lang/Nim/issues/12726
# but all our use-case are for power of 2
func roundNextMultipleOf*(x: Natural, n: Natural): int {.inline.} =
assert n.isPowerOfTwo()
result = (x + n - 1) and not(n - 1)
# func roundNextMultipleOf*(x: Natural, n: static Natural): int {.inline.} =
# ## Round the input to the next multiple of "n"
# when n.isPowerOfTwo():
# # n is a power of 2. (If compiler cannot prove that x>0 it does not make the optim)
# result = (x + n - 1) and not(n - 1)
# else:
# result = ((x + n - 1) div n) * n
# Memory
# ----------------------------------------------------------------------------------
# Nim allocShared, createShared, deallocShared
# take a global lock that is absolutely killing performance
# and shows up either:
# - native_queued_spin_lock_slowpath
# - __pthread_mutex_lock and __pthread_mutex_unlock_usercnt
#
# We use system malloc by default, the flag -d:useMalloc is not enough
template deref*(T: typedesc): typedesc =
## Return the base object type behind a ptr type
typeof(default(T)[])
proc wv_alloc*(T: typedesc): ptr T {.inline.}=
## Default allocator for the Picasso library
## This allocates memory to hold the type T
## and returns a pointer to it
##
## Can use Nim allocator to measure the overhead of its lock
## Memory is not zeroed
when defined(WV_useNimAlloc):
createSharedU(T)
else:
cast[ptr T](c_malloc(csize_t sizeof(T)))
proc wv_allocPtr*(T: typedesc[ptr], zero: static bool = false): T {.inline.}=
## Default allocator for the Picasso library
## This allocates memory to hold the
## underlying type of the pointer type T.
## i.e. if T is ptr int, this allocates an int
##
## Can use Nim allocator to measure the overhead of its lock
## Memory is not zeroed
result = wv_alloc(deref(T))
when zero:
zeroMem(result, sizeof(deref(T)))
proc wv_alloc*(T: typedesc, len: SomeInteger): ptr UncheckedArray[T] {.inline.} =
## Default allocator for the Picasso library.
## This allocates a contiguous chunk of memory
## to hold ``len`` elements of type T
## and returns a pointer to it.
##
## Can use Nim allocator to measure the overhead of its lock
## Memory is not zeroed
when defined(WV_useNimAlloc):
cast[type result](createSharedU(T, len))
else:
cast[type result](c_malloc(csize_t len*sizeof(T)))
proc wv_free*[T: ptr](p: T) {.inline.} =
when defined(WV_useNimAlloc):
freeShared(p)
else:
c_free(p)
when defined(windows):
proc alloca(size: int): pointer {.header: "<malloc.h>".}
else:
proc alloca(size: int): pointer {.header: "<alloca.h>".}
template alloca*(T: typedesc): ptr T =
cast[ptr T](alloca(sizeof(T)))
template alloca*(T: typedesc, len: Natural): ptr UncheckedArray[T] =
cast[ptr UncheckedArray[T]](alloca(sizeof(T) * len))
when defined(windows):
proc aligned_alloc_windows(size, alignment: csize_t): pointer {.sideeffect,importc:"_aligned_malloc", header:"<malloc.h>".}
# Beware of the arg order!
proc wv_freeAligned*[T](p: ptr T){.sideeffect,importc:"_aligned_free", header:"<malloc.h>".}
elif defined(osx):
proc posix_memalign(mem: var pointer, alignment, size: csize_t){.sideeffect,importc, header:"<stdlib.h>".}
proc aligned_alloc(alignment, size: csize_t): pointer {.inline.} =
posix_memalign(result, alignment, size)
proc wv_freeAligned*[T](p: ptr T){.inline.} =
c_free(p)
else:
proc aligned_alloc(alignment, size: csize_t): pointer {.sideeffect,importc, header:"<stdlib.h>".}
proc wv_freeAligned*[T](p: ptr T){.inline.} =
c_free(p)
proc wv_allocAligned*(T: typedesc, alignment: static Natural): ptr T {.inline.} =
## aligned_alloc requires allocating in multiple of the alignment.
static:
assert alignment.isPowerOfTwo()
let # TODO - cannot use a const due to https://github.com/nim-lang/Nim/issues/12726
size = sizeof(T)
requiredMem = size.roundNextMultipleOf(alignment)
when defined(windows):
cast[ptr T](aligned_alloc_windows(csize_t requiredMem, csize_t alignment))
else:
cast[ptr T](aligned_alloc(csize_t alignment, csize_t requiredMem))
proc wv_allocAligned*(T: typedesc, size: int, alignment: static Natural): ptr T {.inline.} =
## aligned_alloc requires allocating in multiple of the alignment.
static:
assert alignment.isPowerOfTwo()
let
requiredMem = size.roundNextMultipleOf(alignment)
when defined(windows):
cast[ptr T](aligned_alloc_windows(csize_t requiredMem, csize_t alignment))
else:
cast[ptr T](aligned_alloc(csize_t alignment, csize_t requiredMem))
proc wv_allocArrayAligned*(T: typedesc, len: int, alignment: static Natural): ptr UncheckedArray[T] {.inline.} =
## aligned_alloc requires allocating in multiple of the alignment.
static:
assert alignment.isPowerOfTwo()
let
size = sizeof(T) * len
requiredMem = size.roundNextMultipleOf(alignment)
when defined(windows):
cast[ptr UncheckedArray[T]](aligned_alloc_windows(csize_t requiredMem, csize_t alignment))
else:
cast[ptr UncheckedArray[T]](aligned_alloc(csize_t alignment, csize_t requiredMem))

View File

@ -43,7 +43,7 @@ import
./channels_spsc_single,
./chase_lev_deques,
./event_notifiers,
./primitives/barriers,
./primitives/[barriers, allocs],
./instrumentation/[contracts, loggers],
./sparsesets,
./flowvars,
@ -98,6 +98,7 @@ type
Taskpool* = ptr object
barrier: SyncBarrier
## Barrier for initialization and teardown
# --- Align: 64
eventNotifier: EventNotifier
## Puts thread to sleep
@ -348,14 +349,16 @@ proc new*(T: type Taskpool, numThreads = countProcessors()): T {.raises: [Except
## Initialize a threadpool that manages `numThreads` threads.
## Default to the number of logical processors available.
var tp = cast[T](c_calloc(1, csize_t sizeof(default(Taskpool)[])))
type TpObj = typeof(default(Taskpool)[])
# Event notifier requires an extra 64 bytes for alignment
var tp = wv_allocAligned(TpObj, sizeof(TpObj) + 64, 64)
tp.barrier.init(numThreads.int32)
tp.eventNotifier.initialize()
tp.numThreads = numThreads
tp.workerDeques = cast[ptr UncheckedArray[ChaseLevDeque[TaskNode]]](c_calloc(csize_t numThreads, csize_t sizeof ChaseLevDeque[TaskNode]))
tp.workers = cast[ptr UncheckedArray[Thread[(Taskpool, WorkerID)]]](c_calloc(csize_t numThreads, csize_t sizeof Thread[(Taskpool, WorkerID)]))
tp.workerSignals = cast[ptr UncheckedArray[Signal]](c_calloc(csize_t numThreads, csize_t sizeof Signal))
tp.workerDeques = wv_allocArrayAligned(ChaseLevDeque[TaskNode], numThreads, alignment = 64)
tp.workers = wv_allocArrayAligned(Thread[(Taskpool, WorkerID)], numThreads, alignment = 64)
tp.workerSignals = wv_allocArrayAligned(Signal, numThreads, alignment = 64)
# Setup master thread
workerContext.id = 0
@ -397,13 +400,13 @@ proc cleanup(tp: var TaskPool) {.raises: [OSError].} =
for i in 1 ..< tp.numThreads:
joinThread(tp.workers[i])
tp.workerSignals.c_free()
tp.workers.c_free()
tp.workerDeques.c_free()
tp.workerSignals.wv_freeAligned()
tp.workers.wv_freeAligned()
tp.workerDeques.wv_freeAligned()
`=destroy`(tp.eventNotifier)
tp.barrier.delete()
tp.c_free()
tp.wv_freeAligned()
proc shutdown*(tp: var TaskPool) {.raises:[Exception].} =
## Wait until all tasks are processed and then shutdown the taskpool