mirror of
https://github.com/status-im/nim-taskpools.git
synced 2025-02-19 17:34:17 +00:00
301 lines
7.6 KiB
Nim
301 lines
7.6 KiB
Nim
# Weave
|
|
# Copyright (c) 2019 Mamy André-Ratsimbazafy
|
|
# Licensed and distributed under either of
|
|
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
|
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
|
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
|
|
|
# From fibril
|
|
#
|
|
# Original license
|
|
#
|
|
# /*
|
|
# * Heat diffusion (Jacobi-type iteration)
|
|
# *
|
|
# * Volker Strumpen, Boston August 1996
|
|
# *
|
|
# * Copyright (c) 1996 Massachusetts Institute of Technology
|
|
# *
|
|
# * This program is free software; you can redistribute it and/or modify
|
|
# * it under the terms of the GNU General Public License as published by
|
|
# * the Free Software Foundation; either version 2 of the License, or
|
|
# * (at your option) any later version.
|
|
# *
|
|
# * This program is distributed in the hope that it will be useful,
|
|
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# * GNU General Public License for more details.
|
|
# *
|
|
# * You should have received a copy of the GNU General Public License
|
|
# * along with this program; if not, write to the Free Software
|
|
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
# */
|
|
|
|
import
|
|
# Stdlib
|
|
strformat, os, strutils, math, system/ansi_c,
|
|
cpuinfo, threadpool,
|
|
# bench
|
|
../wtime, ../resources
|
|
|
|
# This deadlocks :/
|
|
|
|
# Helpers
|
|
# -------------------------------------------------------
|
|
|
|
# We need a thin wrapper around raw pointers for matrices,
|
|
# we can't pass "var seq[seq[float64]]" to other threads
|
|
# nor "var" for that matter
|
|
type
|
|
Matrix[T] = object
|
|
buffer: ptr UncheckedArray[T]
|
|
m, n: int
|
|
|
|
Row[T] = object
|
|
buffer: ptr UncheckedArray[T]
|
|
len: int
|
|
|
|
func newMatrix[T](m, n: int): Matrix[T] {.inline.} =
|
|
result.buffer = cast[ptr UncheckedArray[T]](c_malloc(csize_t m*n*sizeof(T)))
|
|
result.m = m
|
|
result.n = n
|
|
|
|
template `[]`[T](mat: Matrix[T], row, col: Natural): T =
|
|
# row-major storage
|
|
assert row < mat.m
|
|
assert col < mat.n
|
|
mat.buffer[row * mat.n + col]
|
|
|
|
template `[]=`[T](mat: Matrix[T], row, col: Natural, value: T) =
|
|
assert row < mat.m
|
|
assert col < mat.n
|
|
mat.buffer[row * mat.n + col] = value
|
|
|
|
func getRow[T](mat: Matrix[T], rowIdx: Natural): Row[T] {.inline.} =
|
|
# row-major storage, there are n columns in between each rows
|
|
assert rowIdx < mat.m
|
|
result.buffer = cast[ptr UncheckedArray[T]](mat.buffer[rowIdx * mat.n].addr)
|
|
result.len = mat.m
|
|
|
|
template `[]`[T](row: Row[T], idx: Natural): T =
|
|
assert idx < row.len
|
|
row.buffer[idx]
|
|
|
|
template `[]=`[T](row: Row[T], idx: Natural, value: T) =
|
|
assert idx < row.len
|
|
row.buffer[idx] = value
|
|
|
|
func delete[T](mat: sink Matrix[T]) =
|
|
c_free(mat.buffer)
|
|
|
|
# And an auto converter for int32 -> float64 so we don't have to convert
|
|
# all i, j indices manually
|
|
|
|
converter i32toF64(x: int32): float64 {.inline.} =
|
|
float64(x)
|
|
|
|
# -------------------------------------------------------
|
|
|
|
template f(x, y: SomeFloat): SomeFloat =
|
|
sin(x) * sin(y)
|
|
|
|
template randa[T: SomeFloat](x, t: T): T =
|
|
T(0.0)
|
|
|
|
proc randb(x, t: SomeFloat): SomeFloat {.inline.} =
|
|
# proc instead of template to avoid Nim constant folding bug:
|
|
# https://github.com/nim-lang/Nim/issues/12783
|
|
exp(-2 * t) * sin(x)
|
|
|
|
template randc[T: SomeFloat](y, t: T): T =
|
|
T(0.0)
|
|
|
|
proc randd(y, t: SomeFloat): SomeFloat {.inline.} =
|
|
# proc instead of template to avoid Nim constant folding bug:
|
|
# https://github.com/nim-lang/Nim/issues/12783
|
|
exp(-2 * t) * sin(y)
|
|
|
|
template solu(x, y, t: SomeFloat): SomeFloat =
|
|
exp(-2 * t) * sin(x) * sin(y)
|
|
|
|
const n = 4096'i32
|
|
|
|
var
|
|
nx, ny, nt: int32
|
|
xu, xo, yu, yo, tu, to: float64
|
|
|
|
dx, dy, dt: float64
|
|
dtdxsq, dtdysq: float64
|
|
|
|
odd: Matrix[float64]
|
|
even: Matrix[float64]
|
|
|
|
proc heat(m: Matrix[float64], il, iu: int32): bool {.discardable.}=
|
|
# TODO to allow awaiting `heat` we return a dummy bool
|
|
# The parallel spawns are updating the same matrix cells otherwise
|
|
if iu - il > 1:
|
|
let im = (il + iu) div 2
|
|
|
|
let h = spawn heat(m, il, im)
|
|
heat(m, im, iu)
|
|
discard ^h
|
|
return true
|
|
# ------------------------
|
|
|
|
let i = il
|
|
let row = m.getRow(i)
|
|
|
|
if i == 0:
|
|
for j in 0 ..< ny:
|
|
row[j] = randc(yu + j*dy, 0)
|
|
elif i == nx - 1:
|
|
for j in 0 ..< ny:
|
|
row[j] = randd(yu + j*dy, 0)
|
|
else:
|
|
row[0] = randa(xu + i*dx, 0)
|
|
for j in 1 ..< ny - 1:
|
|
row[j] = f(xu + i*dx, yu + j*dy)
|
|
row[ny - 1] = randb(xu + i*dx, 0)
|
|
|
|
proc diffuse(output: Matrix[float64], input: Matrix[float64], il, iu: int32, t: float64): bool {.discardable.} =
|
|
# TODO to allow awaiting `diffuse` we return a dummy bool
|
|
# The parallel spawns are updating the same matrix cells otherwise
|
|
if iu - il > 1:
|
|
let im = (il + iu) div 2
|
|
|
|
let d = spawn diffuse(output, input, il, im, t)
|
|
diffuse(output, input, im, iu, t)
|
|
discard ^d
|
|
return true
|
|
# ------------------------
|
|
|
|
let i = il
|
|
let row = output.getRow(i)
|
|
|
|
if i == 0:
|
|
for j in 0 ..< ny:
|
|
row[j] = randc(yu + j*dy, t)
|
|
elif i == nx - 1:
|
|
for j in 0 ..< ny:
|
|
row[j] = randd(yu + j*dy, t)
|
|
else:
|
|
row[0] = randa(xu + i*dx, t)
|
|
for j in 1 ..< ny - 1:
|
|
row[j] = input[i, j] + # The use of nested sequences here is a bad idea ...
|
|
dtdysq * (input[i, j+1] - 2 * input[i, j] + input[i, j-1]) +
|
|
dtdxsq * (input[i+1, j] - 2 * input[i, j] + input[i-1, j])
|
|
row[ny - 1] = randb(xu + i*dx, t)
|
|
|
|
proc initTest() =
|
|
nx = n
|
|
ny = 1024
|
|
nt = 100
|
|
xu = 0.0
|
|
xo = 1.570796326794896558
|
|
yu = 0.0
|
|
yo = 1.570796326794896558
|
|
tu = 0.0
|
|
to = 0.0000001
|
|
|
|
dx = (xo - xu) / float64(nx - 1)
|
|
dy = (yo - yu) / float64(ny - 1)
|
|
dt = (to - tu) / float64(nt)
|
|
|
|
dtdxsq = dt / (dx * dx)
|
|
dtdysq = dt / (dy * dy)
|
|
|
|
even = newMatrix[float64](nx, ny)
|
|
odd = newMatrix[float64](nx, ny)
|
|
|
|
proc prep() =
|
|
heat(even, 0, nx)
|
|
|
|
proc test() =
|
|
var t = tu
|
|
|
|
for _ in countup(1, nt.int, 2):
|
|
# nt included
|
|
t += dt
|
|
diffuse(odd, even, 0, nx, t)
|
|
t += dt
|
|
diffuse(even, odd, 0, nx, t)
|
|
|
|
if nt mod 2 != 0:
|
|
t += dt
|
|
diffuse(odd, even, 0, nx, t)
|
|
|
|
proc verify() =
|
|
var
|
|
mat: Matrix[float64]
|
|
mae: float64
|
|
mre: float64
|
|
me: float64
|
|
|
|
mat = if nt mod 2 != 0: odd else: even
|
|
|
|
for a in 0 ..< nx:
|
|
for b in 0 ..< ny:
|
|
var tmp = abs(mat[a, b] - solu(xu + a*dx, yu + b*dy, to))
|
|
if tmp > 1e-3:
|
|
echo "nx: ", nx, " - ny: ", ny
|
|
echo "mat[", a, ", ", b, "] = ", mat[a, b], ", expected sol = ", solu(xu + a*dx, yu + b*dy, to)
|
|
quit 1
|
|
|
|
me += tmp
|
|
if tmp > mae: mae = tmp
|
|
if mat[a, b] != 0.0: tmp /= mat[a, b]
|
|
if tmp > mre: mre = tmp
|
|
|
|
me /= nx * ny
|
|
|
|
if mae > 1e-12:
|
|
echo &"Local maximal absolute error {mae:1.3e}"
|
|
quit 1
|
|
if mre > 1e-12:
|
|
echo &"Local maximal relative error {mre:1.3e}"
|
|
quit 1
|
|
if me > 1e-12:
|
|
echo &"Global mean absolute error {me:1.3e}"
|
|
quit 1
|
|
|
|
echo "Verification successful"
|
|
|
|
proc main() =
|
|
var nthreads: int
|
|
nthreads = countProcessors()
|
|
|
|
var ru: Rusage
|
|
getrusage(RusageSelf, ru)
|
|
var
|
|
rss = ru.ru_maxrss
|
|
flt = ru.ru_minflt
|
|
|
|
initTest()
|
|
|
|
prep()
|
|
let start = wtime_usec()
|
|
test()
|
|
let stop = wtime_usec()
|
|
|
|
getrusage(RusageSelf, ru)
|
|
rss = ru.ru_maxrss - rss
|
|
flt = ru.ru_minflt - flt
|
|
|
|
sync()
|
|
|
|
verify()
|
|
delete(even)
|
|
delete(odd)
|
|
|
|
echo "Scheduler: Nim threadpool (standard lib)"
|
|
echo "Benchmark: heat"
|
|
echo "Threads: ", nthreads
|
|
echo "Time(us) ", stop - start
|
|
echo "Max RSS (KB): ", ru.ru_maxrss
|
|
echo "Runtime RSS (KB): ", rss
|
|
echo "# of page faults: ", flt
|
|
|
|
quit 0
|
|
|
|
main()
|