From 5c306bc4d08b5a30d9c65d007435c8a988527d87 Mon Sep 17 00:00:00 2001 From: Dmitriy Ryajov Date: Fri, 4 Mar 2022 21:01:39 -0600 Subject: [PATCH] wip: binding to leopard --- dagger/leopard.nim | 215 ++++++++++++++++++++++++--- tests/dagger/testleopard.nim | 277 ++++++++++++++++++----------------- tests/testAll.nim | 12 +- 3 files changed, 345 insertions(+), 159 deletions(-) diff --git a/dagger/leopard.nim b/dagger/leopard.nim index 3bae67b5..6cac9849 100644 --- a/dagger/leopard.nim +++ b/dagger/leopard.nim @@ -1,33 +1,204 @@ -type - LeopardResult* {.pure.} = enum - Leopard_CallInitialize = -7.cint - Leopard_Platform = -6.cint - Leopard_InvalidInput = -5.cint - Leopard_InvalidCounts = -4.cint - Leopard_InvalidSize = -3.cint - Leopard_TooMuchData = -2.cint - Leopard_NeedMoreData = -1.cint - Leopard_Success = 0.cint +## +## Copyright (c) 2017 Christopher A. Taylor. All rights reserved. +## +## Redistribution and use in source and binary forms, with or without +## modification, are permitted provided that the following conditions are met: +## +## Redistributions of source code must retain the above copyright notice, +## this list of conditions and the following disclaimer. +## Redistributions in binary form must reproduce the above copyright notice, +## this list of conditions and the following disclaimer in the documentation +## and/or other materials provided with the distribution. +## Neither the name of Leopard-RS nor the names of its contributors may be +## used to endorse or promote products derived from this software without +## specific prior written permission. +## +## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +## ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +## LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +## CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +## SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +## INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +## ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +## POSSIBILITY OF SUCH DAMAGE. +## + +## +## Leopard-RS +## MDS Reed-Solomon Erasure Correction Codes for Large Data in C +## +## Algorithms are described in LeopardCommon.h +## +## +## Inspired by discussion with: +## +## Sian-Jhen Lin : Author of {1} {3}, basis for Leopard +## Bulat Ziganshin : Author of FastECC +## Yutaka Sawada : Author of MultiPar +## +## +## References: +## +## {1} S.-J. Lin, T. Y. Al-Naffouri, Y. S. Han, and W.-H. Chung, +## "Novel Polynomial Basis with Fast Fourier Transform +## and Its Application to Reed-Solomon Erasure Codes" +## IEEE Trans. on Information Theory, pp. 6284-6299, November, 2016. +## +## {2} D. G. Cantor, "On arithmetical algorithms over finite fields", +## Journal of Combinatorial Theory, Series A, vol. 50, no. 2, pp. 285-300, 1989. +## +## {3} Sian-Jheng Lin, Wei-Ho Chung, "An Efficient (n, k) Information +## Dispersal Algorithm for High Code Rate System over Fermat Fields," +## IEEE Commun. Lett., vol.16, no.12, pp. 2036-2039, Dec. 2012. +## +## {4} Plank, J. S., Greenan, K. M., Miller, E. L., "Screaming fast Galois Field +## arithmetic using Intel SIMD instructions." In: FAST-2013: 11th Usenix +## Conference on File and Storage Technologies, San Jose, 2013 +## + +## ------------------------------------------------------------------------------ +## Initialization API +## +## leo_init() +## +## Perform static initialization for the library, verifying that the platform +## is supported. +## +## Returns 0 on success and other values on failure. +## const header = "leopard.h" {.pragma: leo, cdecl, header: header.} -proc leo_init*(): cint {.leo, importCpp.} +proc leoInit*(): cint {.leo, importcpp: "leo_init".} -func leo_result_string*(res: LeopardResult): cstring {.leo, importc.} +## ------------------------------------------------------------------------------ +## Shared Constants / Datatypes +## Results -func leo_encode_work_count*(original_count, recovery_count: cuint): cuint - {.leo, importc.} +type + LeopardResult* = enum + LeopardCallInitialize = -7, ## Call leo_init() first + LeopardPlatform = -6, ## Platform is unsupported + LeopardInvalidInput = -5, ## A function parameter was invalid + LeopardInvalidCounts = -4, ## Invalid counts provided + LeopardInvalidSize = -3, ## Buffer size must be a multiple of 64 bytes + LeopardTooMuchData = -2, ## Buffer counts are too high + LeopardNeedMoreData = -1, ## Not enough recovery data received + LeopardSuccess = 0 ## Operation succeeded -proc leo_encode*(buffer_bytes: uint64, original_count, recovery_count, - work_count: cuint, original_data, work_data: pointer): LeopardResult - {.leo, importc.} -func leo_decode_work_count*(original_count, recovery_count: cuint): cuint - {.leo, importc.} +## Convert Leopard result to string -proc leo_decode*(buffer_bytes: uint64, original_count, recovery_count, - work_count: cuint, original_data, recovery_data, work_data: pointer): - LeopardResult {.leo, importc.} +proc leoResultString*(result: LeopardResult): cstring {.leo, importc: "leo_result_string".} +## ------------------------------------------------------------------------------ +## Encoder API +## +## leo_encode_work_count() +## +## Calculate the number of work_data buffers to provide to leo_encode(). +## +## The sum of original_count + recovery_count must not exceed 65536. +## +## Returns the work_count value to pass into leo_encode(). +## Returns 0 on invalid input. +## + +proc leoEncodeWorkCount*(originalCount: cuint; recoveryCount: cuint): cuint {. + leo, importc: "leo_encode_work_count".} +## +## leo_encode() +## +## Generate recovery data. +## +## original_count: Number of original_data[] buffers provided. +## recovery_count: Number of desired recovery data buffers. +## buffer_bytes: Number of bytes in each data buffer. +## original_data: Array of pointers to original data buffers. +## work_count: Number of work_data[] buffers, from leo_encode_work_count(). +## work_data: Array of pointers to work data buffers. +## +## The sum of original_count + recovery_count must not exceed 65536. +## The recovery_count <= original_count. +## +## The buffer_bytes must be a multiple of 64. +## Each buffer should have the same number of bytes. +## Even the last piece must be rounded up to the block size. +## +## Let buffer_bytes = The number of bytes in each buffer: +## +## original_count = static_cast( +## ((uint64_t)total_bytes + buffer_bytes - 1) / buffer_bytes); +## +## Or if the number of pieces is known: +## +## buffer_bytes = static_cast( +## ((uint64_t)total_bytes + original_count - 1) / original_count); +## +## Returns Leopard_Success on success. +## The first set of recovery_count buffers in work_data will be the result. +## Returns other values on errors. +## + +proc leoEncode*(bufferBytes: uint64; originalCount: cuint; recoveryCount: cuint; + workCount: cuint; originalData: ptr pointer; workData: ptr pointer): LeopardResult {. + leo, importc: "leo_encode".} + ## Number of bytes in each data buffer + ## Number of original_data[] buffer pointers + ## Number of recovery_data[] buffer pointers + ## Number of work_data[] buffer pointers, from leo_encode_work_count() + ## Array of pointers to original data buffers +## Array of work buffers +## ------------------------------------------------------------------------------ +## Decoder API +## +## leo_decode_work_count() +## +## Calculate the number of work_data buffers to provide to leo_decode(). +## +## The sum of original_count + recovery_count must not exceed 65536. +## +## Returns the work_count value to pass into leo_encode(). +## Returns 0 on invalid input. +## + +proc leoDecodeWorkCount*(originalCount: cuint; recoveryCount: cuint): cuint {. + leo, importc: "leo_decode_work_count".} +## +## leo_decode() +## +## Decode original data from recovery data. +## +## buffer_bytes: Number of bytes in each data buffer. +## original_count: Number of original_data[] buffers provided. +## original_data: Array of pointers to original data buffers. +## recovery_count: Number of recovery_data[] buffers provided. +## recovery_data: Array of pointers to recovery data buffers. +## work_count: Number of work_data[] buffers, from leo_decode_work_count(). +## work_data: Array of pointers to recovery data buffers. +## +## Lost original/recovery data should be set to NULL. +## +## The sum of recovery_count + the number of non-NULL original data must be at +## least original_count in order to perform recovery. +## +## Returns Leopard_Success on success. +## Returns other values on errors. +## + +proc leoDecode*(bufferBytes: uint64; originalCount: cuint; recoveryCount: cuint; + workCount: cuint; originalData: ptr pointer; + recoveryData: ptr pointer; workData: ptr pointer): LeopardResult {. + leo, importc: "leo_decode".} + ## Number of bytes in each data buffer + ## Number of original_data[] buffer pointers + ## Number of recovery_data[] buffer pointers + ## Number of buffer pointers in work_data[] + ## Array of original data buffers + ## Array of recovery data buffers +## Array of work data buffers diff --git a/tests/dagger/testleopard.nim b/tests/dagger/testleopard.nim index 522a014b..1d56f6ed 100644 --- a/tests/dagger/testleopard.nim +++ b/tests/dagger/testleopard.nim @@ -1,168 +1,185 @@ -# import std/os -# import std/random import std/sequtils import std/strformat import pkg/dagger/leopard +import pkg/dagger/rng import pkg/stew/byteutils +import pkg/stew/ptrops + +import pkg/libp2p/varint + +const + LEO_ALIGN_BYTES = 16'u type TestParameters = object - original_count: cuint - recovery_count: cuint - buffer_bytes : cuint - loss_count : cuint - seed : cuint + originalCount: cuint + recoveryCount: cuint + bufferBytes : cuint + lossCount : cuint + seed : cuint - Vec = seq[pointer] +proc randomCRCPacket(rng: Rng, data: var openArray[byte]) = + if data.len < 16: + data[0] = rng.rand(data.len).byte + for i in 1..= LEO_ALIGN_BYTES: + return + + data = cast[pointer](cast[uint](data) - (LEO_ALIGN_BYTES - offset)) + dealloc(data) proc benchmark(params: TestParameters) = - var - # original_data = newSeqWith(params.original_count.int, - # newSeq[byte](params.buffer_bytes)) + let + rng = Rng.instance() + encodeWorkCount = leoEncodeWorkCount( + params.originalCount, + params.recoveryCount) + decodeWorkCount = leoDecodeWorkCount( + params.originalCount, + params.recoveryCount) - # original_data_0 = newSeqWith(params.original_count.int, hello.toBytes) - # original_data = Vec.new(original_data_0) - - hello01 = "hello world01 " - hello02 = "hello world02 " - hello03 = "hello world03 " - hello04 = "hello world04 " - hello05 = "hello world05 " - hello06 = "hello world06 " - hello07 = "hello world07 " - hello08 = "hello world08 " - hello09 = "hello world09 " - hello10 = "hello world10 " - hello11 = "hello world11 " - hello12 = "hello world12 " - hello13 = "hello world13 " - hello14 = "hello world14 " - hello15 = "hello world15 " - hello16 = "hello world16 " - hello17 = "hello world17 " - hello18 = "hello world18 " - hello19 = "hello world19 " - hello20 = "hello world20 " - - original_data_0 = @[ - hello01.toBytes, - hello02.toBytes, - hello03.toBytes, - hello04.toBytes, - hello05.toBytes, - hello06.toBytes, - hello07.toBytes, - hello08.toBytes, - hello09.toBytes, - hello10.toBytes, - hello11.toBytes, - hello12.toBytes, - hello13.toBytes, - hello14.toBytes, - hello15.toBytes, - hello16.toBytes, - hello17.toBytes, - hello18.toBytes, - hello19.toBytes, - hello20.toBytes - ] - - original_data = Vec.new(original_data_0) - - # debugEcho $original_data_0.mapIt(repr it) - # debugEcho $original_data.mapIt(repr cast[seq[byte]](cast[pointer](cast[int](it) - 16))) - debugEcho $original_data.mapIt(cast[int](it)) + debugEcho "original work count: " & $params.originalCount + debugEcho "encode work count: " & $encodeWorkCount + debugEcho "decode work count: " & $decodeWorkCount let - encode_work_count = leo_encode_work_count(params.original_count, - params.recovery_count) - decode_work_count = leo_decode_work_count(params.original_count, - params.recovery_count) + totalBytes = (params.buffer_bytes * params.originalCount).uint64 - debugEcho "encode_work_count: " & $encode_work_count - debugEcho "decode_work_count: " & $decode_work_count - - let - total_bytes = (params.buffer_bytes * params.original_count).uint64 - - debugEcho "total_bytes: " & $total_bytes + debugEcho "total_bytes: " & $totalBytes var - encode_work_data_0 = newSeqWith(encode_work_count.int, - newSeq[byte](params.buffer_bytes)) + originalData = newSeq[pointer](params.originalCount) + encodeWorkData = newSeq[pointer](encodeWorkCount) + decodeWorkData = newSeq[pointer](decodeWorkCount) - encode_work_data = Vec.new(encode_work_data_0) + for i in 0..