commit 7b23545c275c2a09668eebe0d9491bbd2d4de2da Author: Chrysostomos Nanakos Date: Mon Jan 5 03:05:14 2026 +0200 initial commit Signed-off-by: Chrysostomos Nanakos diff --git a/LICENSE-APACHEv2 b/LICENSE-APACHEv2 new file mode 100644 index 0000000..782d1bf --- /dev/null +++ b/LICENSE-APACHEv2 @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018 Status Research & Development GmbH + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..e6ba156 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2022 Status Research & Development GmbH + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..cfda258 --- /dev/null +++ b/README.md @@ -0,0 +1,188 @@ +# nim-blockstore + +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) +[![Stability: experimental](https://img.shields.io/badge/Stability-experimental-orange.svg)](#stability) +[![nim](https://img.shields.io/badge/nim-2.2.4+-yellow.svg)](https://nim-lang.org/) + +A content-addressed block storage library for Nim with configurable hash algorithms, codecs, and merkle tree proofs for block verification. + +## Features + +- Content-addressed storage with CIDv1 identifiers +- Configurable hash functions and codecs via `BlockHashConfig` +- Merkle tree proofs for block verification +- Multiple storage backends: + - Block storage: sharded files (`bbSharded`) or packed files (`bbPacked`) + - Merkle tree storage: embedded proofs (`mbEmbeddedProofs`), LevelDB (`mbLevelDb`), or packed files (`mbPacked`) + - Blockmap storage: LevelDB (`bmLevelDb`) or files (`bmFile`) +- Direct I/O support (`ioDirect`) for crash consistency and OS cache bypass +- Buffered I/O mode (`ioBuffered`) with configurable batch sync +- Storage quota management +- Background garbage collection with deletion worker +- Metadata stored in LevelDB +- Async file chunking +- Dataset management with manifests + +## Usage + +### Building a Dataset from a File + +```nim +import blockstore +import taskpools +import std/options + +proc buildDataset() {.async.} = + # Create a shared thread pool for async file I/O + let pool = Taskpool.new(numThreads = 4) + defer: pool.shutdown() + + let store = newDatasetStore("./db", "./blocks").get() + + # Start building a dataset with 64KB chunks + let builder = store.startDataset(64 * 1024, some("myfile.txt")).get() + + # Chunk the file + let stream = (await builder.chunkFile(pool)).get() + + while true: + let blockOpt = await stream.nextBlock() + if blockOpt.isNone: + break + let blockResult = blockOpt.get() + if blockResult.isErr: + echo "Error: ", blockResult.error + break + discard await builder.addBlock(blockResult.value) + + stream.close() + + # Finalize and get the dataset + let dataset = (await builder.finalize()).get() + echo "Tree CID: ", dataset.treeCid + echo "Manifest CID: ", dataset.manifestCid + +waitFor buildDataset() +``` + +### Retrieving Blocks with Proofs + +```nim +import blockstore + +proc getBlockWithProof(store: DatasetStore, treeCid: Cid, index: int) {.async.} = + let datasetOpt = (await store.getDataset(treeCid)).get() + if datasetOpt.isNone: + echo "Dataset not found" + return + + let dataset = datasetOpt.get() + let blockOpt = (await dataset.getBlock(index)).get() + + if blockOpt.isSome: + let (blk, proof) = blockOpt.get() + echo "Block: ", blk + echo "Proof index: ", proof.index + echo "Proof path length: ", proof.path.len +``` + +## API Reference + +### newDatasetStore + +Creates a new dataset store with configurable backends and I/O modes: + +```nim +proc newDatasetStore*( + dbPath: string, # Path to LevelDB database + blocksDir: string, # Directory for block storage + quota: uint64 = 0, # Storage quota (0 = unlimited) + blockHashConfig: BlockHashConfig = defaultBlockHashConfig(), + merkleBackend: MerkleBackend = mbPacked, # Merkle tree storage backend + blockBackend: BlockBackend = bbSharded, # Block storage backend + blockmapBackend: BlockmapBackend = bmLevelDb, # Blockmap storage backend + ioMode: IOMode = ioDirect, # I/O mode + syncBatchSize: int = 0, # Batch size for sync (buffered mode) + pool: Taskpool = nil # Thread pool for deletion worker +): BResult[DatasetStore] +``` + +### Storage Backends + +#### BlockBackend + +| Value | Description | +|-------|-------------| +| `bbSharded` | Sharded directory structure (default). One file per block with 2-level sharding. | +| `bbPacked` | Packed file format. All blocks for a dataset in a single file. | + +#### MerkleBackend + +Controls how merkle proofs are stored and generated. + +| Value | Description | +|-------|-------------| +| `mbEmbeddedProofs` | Proofs computed during build (tree in memory) and embedded in block references in LevelDB. Tree discarded after finalize. Good for smaller datasets. | +| `mbLevelDb` | Tree nodes stored in LevelDB. Proofs generated on-demand from stored tree. | +| `mbPacked` | Tree nodes in packed files (default). One file per tree. Proofs generated on-demand. Efficient for large datasets. | + +#### BlockmapBackend + +| Value | Description | +|-------|-------------| +| `bmLevelDb` | LevelDB storage (default). Shared with metadata. | +| `bmFile` | File-based storage. One file per blockmap. | + +### I/O Modes + +| Value | Description | +|-------|-------------| +| `ioDirect` | Direct I/O (default). Bypasses OS cache, data written directly to disk. Provides crash consistency. | +| `ioBuffered` | Buffered I/O. Uses OS cache. Use `syncBatchSize` to control sync frequency if needed. | + +### BlockHashConfig + +Configuration for block hashing and CID generation: + +| Field | Type | Description | +|-------|------|-------------| +| `hashFunc` | `HashFunc` | Hash function `proc(data: openArray[byte]): HashDigest` | +| `hashCode` | `MultiCodec` | Multicodec identifier for the hash (e.g., `Sha256Code`) | +| `blockCodec` | `MultiCodec` | Codec for blocks (e.g., `LogosStorageBlock`) | +| `treeCodec` | `MultiCodec` | Codec for merkle tree CIDs (e.g., `LogosStorageTree`) | + +## Running Tests + +```bash +nimble test +``` + +## Code Coverage + +Generate HTML coverage reports: + +```bash +# All tests +nimble coverage + +# Individual test suites +nimble coverage_merkle +nimble coverage_block +nimble coverage_chunker + +# Clean coverage data +nimble coverage_clean +``` + +## Stability + +This library is in experimental status and may have breaking changes between versions until it stabilizes. + +## License + +nim-blockstore is licensed and distributed under either of: + +* Apache License, Version 2.0: [LICENSE-APACHEv2](LICENSE-APACHEv2) or https://opensource.org/licenses/Apache-2.0 +* MIT license: [LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT + +at your option. The contents of this repository may not be copied, modified, or distributed except according to those terms. diff --git a/blockstore.nim b/blockstore.nim new file mode 100644 index 0000000..68bf2f5 --- /dev/null +++ b/blockstore.nim @@ -0,0 +1,2 @@ +import blockstore/blockstore +export blockstore diff --git a/blockstore.nimble b/blockstore.nimble new file mode 100644 index 0000000..d6f121f --- /dev/null +++ b/blockstore.nimble @@ -0,0 +1,116 @@ +# Package + +version = "0.1.0" +author = "Status Research & Development GmbH" +description = "Nim blockstore" +license = "Apache License 2.0 or MIT" +srcDir = "blockstore" + +requires "nim >= 2.2.4" +requires "nimcrypto >= 0.6.0" +requires "leveldbstatic >= 0.1.0" +requires "results >= 0.4.0" +requires "chronos >= 4.0.0" +requires "libp2p >= 1.14.1 & < 2.0.0" +requires "constantine >= 0.2.0" +requires "taskpools >= 0.0.5" +requires "hashlib >= 1.0.0" + +task test, "Run the test suite": + exec "nim c -r tests/test_block.nim" + exec "nim c -r tests/test_merkle.nim" + exec "nim c -r tests/test_chunker.nim" + exec "nim c -r tests/test_dataset.nim" + +task test_constantine, "Run the test suite": + exec "nim c -d:useConstantine -r tests/test_block.nim" + exec "nim c -d:useConstantine -r tests/test_merkle.nim" + exec "nim c -d:useConstantine -r tests/test_chunker.nim" + exec "nim c -d:useConstantine -r tests/test_dataset.nim" + +task test_blake3, "Run the test suite": + exec "nim c -d:useBlake3 -r tests/test_block.nim" + exec "nim c -d:useBlake3 -r tests/test_merkle.nim" + exec "nim c -d:useBlake3 -r tests/test_chunker.nim" + exec "nim c -d:useBlake3 -r tests/test_dataset.nim" + +task test_clean, "Clean test binaries": + exec "rm tests/test_block" + exec "rm tests/test_merkle" + exec "rm tests/test_chunker" + exec "rm tests/test_dataset" + +task benchmark, "Compile dataset benchmark": + exec "nim c --hints:off -d:release -r tests/bench_dataset.nim" + +task benchmark_constantine, "Compile dataset benchmark with constantine": + exec "nim c --hints:off -d:release -d:useConstantine -r tests/bench_dataset.nim" + +task benchmark_blake3, "Compile dataset benchmark with BLAKE3": + exec "nim c --hints:off -d:release -d:useBlake3 -r tests/bench_dataset.nim" + +task benchmark_merkle, "Compile merkle benchmark": + exec "nim c --hints:off -d:release -r tests/bench_merkle.nim" + +task benchmark_merkle_constantine, "Compile merkle benchmark": + exec "nim c --hints:off -d:release -d:useConstantine -r tests/bench_merkle.nim" + +task benchmark_merkle_blake3, "Compile merkle benchmark": + exec "nim c --hints:off -d:release -d:useBlake3 -r tests/bench_merkle.nim" + +const + nimcacheBase = ".nimcache" + coverageFlags = "--passC:\"-fprofile-arcs -ftest-coverage\" --passL:\"-fprofile-arcs -ftest-coverage\"" + coverageDir = "coverage_report" + +proc runCoverage(testFile: string, reportName: string) = + let nimcacheDir = nimcacheBase & "/" & reportName + exec "nim c " & coverageFlags & " --nimcache:" & nimcacheDir & " -r tests/" & testFile & ".nim" + exec "lcov --capture --directory " & nimcacheDir & " --output-file " & reportName & ".info --quiet" + exec "lcov --extract " & reportName & ".info '*@sblockstore@s*' --output-file " & reportName & "_filtered.info --quiet" + exec "genhtml " & reportName & "_filtered.info --output-directory " & coverageDir & "/" & reportName & " --quiet" + exec "rm -f " & reportName & ".info " & reportName & "_filtered.info" + echo "Coverage report: " & coverageDir & "/" & reportName & "/index.html" + +task coverage, "Run all tests with coverage and generate HTML report": + mkDir(coverageDir) + mkDir(nimcacheBase) + exec "nim c " & coverageFlags & " --nimcache:" & nimcacheBase & "/test_block -r tests/test_block.nim" + exec "nim c " & coverageFlags & " --nimcache:" & nimcacheBase & "/test_merkle -r tests/test_merkle.nim" + exec "nim c " & coverageFlags & " --nimcache:" & nimcacheBase & "/test_chunker -r tests/test_chunker.nim" + exec "lcov --capture --directory " & nimcacheBase & "/test_block --directory " & nimcacheBase & "/test_merkle --directory " & nimcacheBase & "/test_chunker --output-file all_coverage.info --quiet" + exec "lcov --extract all_coverage.info '*@sblockstore@s*' --output-file blockstore_coverage.info --quiet" + exec "genhtml blockstore_coverage.info --output-directory " & coverageDir & "/all --quiet" + exec "rm -f all_coverage.info blockstore_coverage.info" + echo "Coverage report: " & coverageDir & "/all/index.html" + +task coverage_merkle, "Run merkle tests with coverage": + mkDir(coverageDir) + mkDir(nimcacheBase) + runCoverage("test_merkle", "test_merkle") + +task coverage_block, "Run block tests with coverage": + mkDir(coverageDir) + mkDir(nimcacheBase) + runCoverage("test_block", "test_block") + +task coverage_chunker, "Run chunker tests with coverage": + mkDir(coverageDir) + mkDir(nimcacheBase) + runCoverage("test_chunker", "test_chunker") + +task coverage_bench_merkle, "Run merkle benchmark with coverage": + mkDir(coverageDir) + mkDir(nimcacheBase) + let nimcacheDir = nimcacheBase & "/bench_merkle" + exec "nim c " & coverageFlags & " --nimcache:" & nimcacheDir & " -r tests/bench_merkle.nim --size=100MB" + exec "lcov --capture --directory " & nimcacheDir & " --output-file bench_merkle.info --quiet" + exec "lcov --extract bench_merkle.info '*@sblockstore@s*' --output-file bench_merkle_filtered.info --quiet" + exec "genhtml bench_merkle_filtered.info --output-directory " & coverageDir & "/bench_merkle --quiet" + exec "rm -f bench_merkle.info bench_merkle_filtered.info" + echo "Coverage report: " & coverageDir & "/bench_merkle/index.html" + +task coverage_clean, "Clean coverage data and reports": + exec "rm -rf " & coverageDir + exec "rm -rf " & nimcacheBase + echo "Coverage data cleaned" diff --git a/blockstore/blockmap.nim b/blockstore/blockmap.nim new file mode 100644 index 0000000..ab33c5a --- /dev/null +++ b/blockstore/blockmap.nim @@ -0,0 +1,497 @@ +import std/[os, bitops, memfiles, posix] +import results + +import ./errors +import ./sharding +import ./cid + +proc newBlockmap*(size: int): seq[byte] = + let byteCount = (size + 7) div 8 + newSeq[byte](byteCount) + +proc blockmapGet*(blockmap: seq[byte], index: int): bool = + if index < 0: + return false + let + byteIdx = index div 8 + bitIdx = index mod 8 + if byteIdx >= blockmap.len: + return false + (blockmap[byteIdx] and (1'u8 shl bitIdx)) != 0 + +proc blockmapSet*(blockmap: var seq[byte], index: int, value: bool) = + if index < 0: + return + let + byteIdx = index div 8 + bitIdx = index mod 8 + if byteIdx >= blockmap.len: + return + if value: + blockmap[byteIdx] = blockmap[byteIdx] or (1'u8 shl bitIdx) + else: + blockmap[byteIdx] = blockmap[byteIdx] and not (1'u8 shl bitIdx) + +proc blockmapCountOnes*(blockmap: seq[byte]): int = + result = 0 + for b in blockmap: + result += countSetBits(b) + +const + BlockmapMagic = 0x424D4150'u32 + BlockmapVersion = 1'u8 + BlocksPerChunk* = 1024 * 1024 + GrowthChunk = 1024 * 1024 + HeaderSize = 24 + ChunkEmpty* = 0x00'u8 + ChunkFull* = 0xFF'u8 + ChunkPartial* = 0x01'u8 + +type + BlockmapBackend* = enum + bmLevelDb + bmFile + + FileBlockmap* = ref object + path: string + file: MemFile + fileSize: int + maxIndex: uint64 + indexSize: uint32 + readOnly: bool + + BlockRange* = object + start*: uint64 + count*: uint64 + +proc headerMaxIndex(mem: pointer): ptr uint64 {.inline.} = + cast[ptr uint64](cast[uint](mem) + 8) + +proc headerIndexSize(mem: pointer): ptr uint32 {.inline.} = + cast[ptr uint32](cast[uint](mem) + 16) + +proc indexOffset(): int {.inline.} = + HeaderSize + +proc bitmapOffset(indexSize: uint32): int {.inline.} = + HeaderSize + indexSize.int + +proc chunkIndexPtr(bm: FileBlockmap, chunkIdx: uint32): ptr uint8 {.inline.} = + if chunkIdx >= bm.indexSize: + return nil + cast[ptr uint8](cast[uint](bm.file.mem) + indexOffset().uint + chunkIdx.uint) + +proc bitmapBytePtr(bm: FileBlockmap, byteIdx: uint64): ptr uint8 {.inline.} = + let offset = bitmapOffset(bm.indexSize).uint64 + byteIdx + if offset.int >= bm.fileSize: + return nil + cast[ptr uint8](cast[uint](bm.file.mem) + offset.uint) + +proc getChunkState*(bm: FileBlockmap, chunkIdx: uint32): uint8 = + let p = bm.chunkIndexPtr(chunkIdx) + if p == nil: + return ChunkEmpty + p[] + +proc setChunkState(bm: FileBlockmap, chunkIdx: uint32, state: uint8) = + let p = bm.chunkIndexPtr(chunkIdx) + if p != nil: + p[] = state + +proc neededFileSize(blockIndex: uint64, currentIndexSize: uint32): tuple[fileSize: int, indexSize: uint32] = + let chunkIdx = (blockIndex div BlocksPerChunk).uint32 + 1 + let newIndexSize = max(currentIndexSize, chunkIdx) + let byteIdx = blockIndex div 8 + let bitmapEnd = bitmapOffset(newIndexSize) + byteIdx.int + 1 + let fileSize = ((bitmapEnd + GrowthChunk - 1) div GrowthChunk) * GrowthChunk + (fileSize, newIndexSize) + +proc growFile(bm: FileBlockmap, newSize: int, newIndexSize: uint32): BResult[void] = + if bm.readOnly: + return err(ioError("Cannot grow read-only blockmap")) + + var oldBitmapData: seq[byte] = @[] + let oldIndexSize = bm.indexSize + let oldBitmapOffset = bitmapOffset(oldIndexSize) + let newBitmapOffset = bitmapOffset(newIndexSize) + + if newIndexSize > oldIndexSize and oldIndexSize > 0: + let bitmapSize = bm.fileSize - oldBitmapOffset + if bitmapSize > 0: + oldBitmapData = newSeq[byte](bitmapSize) + copyMem(addr oldBitmapData[0], cast[pointer](cast[uint](bm.file.mem) + oldBitmapOffset.uint), bitmapSize) + + bm.file.close() + + try: + let fd = posix.open(bm.path.cstring, O_RDWR) + if fd < 0: + return err(ioError("Failed to open file for truncate")) + if ftruncate(fd, newSize.Off) != 0: + discard posix.close(fd) + return err(ioError("Failed to truncate file")) + discard posix.close(fd) + except OSError as e: + return err(ioError("Failed to grow file: " & e.msg)) + + try: + bm.file = memfiles.open(bm.path, fmReadWrite, mappedSize = newSize) + except OSError as e: + return err(ioError("Failed to remap file: " & e.msg)) + + bm.fileSize = newSize + headerIndexSize(bm.file.mem)[] = newIndexSize + + for i in oldIndexSize ..< newIndexSize: + let p = cast[ptr uint8](cast[uint](bm.file.mem) + indexOffset().uint + i.uint) + p[] = ChunkEmpty + + if oldBitmapData.len > 0: + copyMem(cast[pointer](cast[uint](bm.file.mem) + newBitmapOffset.uint), addr oldBitmapData[0], oldBitmapData.len) + if newBitmapOffset > oldBitmapOffset: + let gapSize = min(newBitmapOffset - oldBitmapOffset, oldBitmapData.len) + zeroMem(cast[pointer](cast[uint](bm.file.mem) + oldBitmapOffset.uint), gapSize) + + bm.indexSize = newIndexSize + ok() + +proc ensureCapacity(bm: FileBlockmap, blockIndex: uint64): BResult[void] = + let (neededSize, neededIndexSize) = neededFileSize(blockIndex, bm.indexSize) + if neededSize <= bm.fileSize and neededIndexSize <= bm.indexSize: + return ok() + ?bm.growFile(max(neededSize, bm.fileSize), max(neededIndexSize, bm.indexSize)) + ok() + +proc get*(bm: FileBlockmap, index: uint64): bool {.inline.} = + if index >= bm.maxIndex: + return false + + let chunkIdx = (index div BlocksPerChunk).uint32 + let chunkState = bm.getChunkState(chunkIdx) + + if chunkState == ChunkEmpty: + return false + if chunkState == ChunkFull: + return true + + let byteIdx = index div 8 + let bitIdx = index mod 8 + let p = bm.bitmapBytePtr(byteIdx) + if p == nil: + return false + (p[] and (1'u8 shl bitIdx)) != 0 + +proc set*(bm: FileBlockmap, index: uint64): BResult[void] = + if bm.readOnly: + return err(ioError("Cannot write to read-only blockmap")) + + ?bm.ensureCapacity(index) + + let chunkIdx = (index div BlocksPerChunk).uint32 + let chunkState = bm.getChunkState(chunkIdx) + + if chunkState == ChunkFull: + return ok() + + let byteIdx = index div 8 + let bitIdx = index mod 8 + let p = bm.bitmapBytePtr(byteIdx) + if p != nil: + p[] = p[] or (1'u8 shl bitIdx) + + if chunkState == ChunkEmpty: + bm.setChunkState(chunkIdx, ChunkPartial) + + if index + 1 > bm.maxIndex: + bm.maxIndex = index + 1 + headerMaxIndex(bm.file.mem)[] = bm.maxIndex + + ok() + +proc clear*(bm: FileBlockmap, index: uint64): BResult[void] = + if bm.readOnly: + return err(ioError("Cannot write to read-only blockmap")) + + if index >= bm.maxIndex: + return ok() + + let chunkIdx = (index div BlocksPerChunk).uint32 + let chunkState = bm.getChunkState(chunkIdx) + + if chunkState == ChunkEmpty: + return ok() + + let byteIdx = index div 8 + let bitIdx = index mod 8 + let p = bm.bitmapBytePtr(byteIdx) + if p != nil: + p[] = p[] and not (1'u8 shl bitIdx) + + if chunkState == ChunkFull: + bm.setChunkState(chunkIdx, ChunkPartial) + + ok() + +proc countChunkBits(bm: FileBlockmap, chunkIdx: uint32): int = + let startBlock = chunkIdx.uint64 * BlocksPerChunk + let endBlock = min(startBlock + BlocksPerChunk, bm.maxIndex) + if startBlock >= endBlock: + return 0 + + let startByte = startBlock div 8 + let endByte = (endBlock + 7) div 8 + + result = 0 + for i in startByte ..< endByte: + let p = bm.bitmapBytePtr(i) + if p != nil: + result += countSetBits(p[]) + +proc compactIndex*(bm: FileBlockmap) = + if bm.readOnly: + return + + for i in 0'u32 ..< bm.indexSize: + let state = bm.getChunkState(i) + if state == ChunkPartial: + let bits = bm.countChunkBits(i) + let startBlock = i.uint64 * BlocksPerChunk + let blocksInChunk = min(BlocksPerChunk.uint64, bm.maxIndex - startBlock).int + + if bits == 0: + bm.setChunkState(i, ChunkEmpty) + elif bits == blocksInChunk: + bm.setChunkState(i, ChunkFull) + +proc countOnes*(bm: FileBlockmap): uint64 = + result = 0 + for i in 0'u32 ..< bm.indexSize: + let state = bm.getChunkState(i) + case state + of ChunkEmpty: + discard + of ChunkFull: + let startBlock = i.uint64 * BlocksPerChunk + result += min(BlocksPerChunk.uint64, bm.maxIndex - startBlock) + else: + result += bm.countChunkBits(i).uint64 + +proc isComplete*(bm: FileBlockmap, totalBlocks: uint64): bool = + if bm.maxIndex < totalBlocks: + return false + let neededChunks = ((totalBlocks + BlocksPerChunk - 1) div BlocksPerChunk).uint32 + for i in 0'u32 ..< neededChunks: + if bm.getChunkState(i) != ChunkFull: + return false + true + +proc isEmpty*(bm: FileBlockmap): bool = + for i in 0'u32 ..< bm.indexSize: + if bm.getChunkState(i) != ChunkEmpty: + return false + true + +proc maxBlockIndex*(bm: FileBlockmap): uint64 = + bm.maxIndex + +proc toRanges*(bm: FileBlockmap): seq[BlockRange] = + result = @[] + if bm.indexSize == 0: + return + + var currentStart: uint64 = 0 + var inRange = false + + for i in 0'u32 ..< bm.indexSize: + let state = bm.getChunkState(i) + let chunkStart = i.uint64 * BlocksPerChunk + let chunkEnd = min(chunkStart + BlocksPerChunk, bm.maxIndex) + + case state + of ChunkFull: + if not inRange: + currentStart = chunkStart + inRange = true + + if i == bm.indexSize - 1 or bm.getChunkState(i + 1) != ChunkFull: + result.add(BlockRange(start: currentStart, count: chunkEnd - currentStart)) + inRange = false + + of ChunkEmpty: + if inRange: + result.add(BlockRange(start: currentStart, count: chunkStart - currentStart)) + inRange = false + + of ChunkPartial: + if inRange: + result.add(BlockRange(start: currentStart, count: chunkStart - currentStart)) + inRange = false + + var j = chunkStart + while j < chunkEnd: + if bm.get(j): + let rangeStart = j + while j < chunkEnd and bm.get(j): + inc j + result.add(BlockRange(start: rangeStart, count: j - rangeStart)) + else: + inc j + + else: + discard + +proc flush*(bm: FileBlockmap) = + if not bm.readOnly: + bm.file.flush() + +proc close*(bm: FileBlockmap) = + if bm.file.mem != nil: + bm.flush() + bm.file.close() + +proc setAll*(bm: FileBlockmap, totalBlocks: uint64): BResult[void] = + if bm.readOnly: + return err(ioError("Cannot write to read-only blockmap")) + + if totalBlocks == 0: + return ok() + + ?bm.ensureCapacity(totalBlocks - 1) + + let fullBytes = totalBlocks div 8 + let remainderBits = totalBlocks mod 8 + + for i in 0'u64 ..< fullBytes: + let p = bm.bitmapBytePtr(i) + if p != nil: + p[] = 0xFF'u8 + + if remainderBits > 0: + let p = bm.bitmapBytePtr(fullBytes) + if p != nil: + p[] = (1'u8 shl remainderBits) - 1 + + bm.maxIndex = totalBlocks + headerMaxIndex(bm.file.mem)[] = totalBlocks + + let chunkCount = ((totalBlocks + BlocksPerChunk - 1) div BlocksPerChunk).uint32 + for i in 0'u32 ..< chunkCount: + bm.setChunkState(i, ChunkFull) + + ok() + +proc finalize*(bm: FileBlockmap, totalBlocks: uint64): BResult[void] = + if bm.readOnly: + return ok() + + if totalBlocks > bm.maxIndex: + bm.maxIndex = totalBlocks + headerMaxIndex(bm.file.mem)[] = totalBlocks + + bm.compactIndex() + bm.flush() + ok() + +proc newFileBlockmap*(path: string, forWriting: bool = true): BResult[FileBlockmap] = + let parentDir = parentDir(path) + if not dirExists(parentDir): + try: + createDir(parentDir) + except OSError as e: + return err(ioError("Failed to create directory: " & e.msg)) + + var isNew = not fileExists(path) + + if isNew and not forWriting: + return err(ioError("Blockmap file does not exist: " & path)) + + var initialSize = HeaderSize + GrowthChunk + + if isNew: + try: + let fd = posix.open(path.cstring, O_RDWR or O_CREAT, 0o644) + if fd < 0: + return err(ioError("Failed to create blockmap file")) + if ftruncate(fd, initialSize.Off) != 0: + discard posix.close(fd) + return err(ioError("Failed to set initial file size")) + discard posix.close(fd) + except OSError as e: + return err(ioError("Failed to create blockmap file: " & e.msg)) + else: + try: + initialSize = getFileSize(path).int + except OSError as e: + return err(ioError("Failed to get file size: " & e.msg)) + + let mode = if forWriting: fmReadWrite else: fmRead + var mf: MemFile + try: + mf = memfiles.open(path, mode, mappedSize = initialSize) + except OSError as e: + return err(ioError("Failed to mmap blockmap: " & e.msg)) + + var bm = FileBlockmap( + path: path, + file: mf, + fileSize: initialSize, + maxIndex: 0, + indexSize: 0, + readOnly: not forWriting + ) + + if isNew: + let header = cast[ptr uint32](mf.mem) + header[] = BlockmapMagic + cast[ptr uint8](cast[uint](mf.mem) + 4)[] = BlockmapVersion + headerMaxIndex(mf.mem)[] = 0 + headerIndexSize(mf.mem)[] = 0 + else: + let magic = cast[ptr uint32](mf.mem)[] + if magic != BlockmapMagic: + mf.close() + return err(ioError("Invalid blockmap magic")) + let version = cast[ptr uint8](cast[uint](mf.mem) + 4)[] + if version != BlockmapVersion: + mf.close() + return err(ioError("Unsupported blockmap version")) + bm.maxIndex = headerMaxIndex(mf.mem)[] + bm.indexSize = headerIndexSize(mf.mem)[] + + ok(bm) + +proc getBlockmapPath*(blockmapsDir: string, treeCid: Cid): string = + getShardedPath(blockmapsDir, treeCid, ".blkmap") + +proc getBlockmapPathStr*(blockmapsDir: string, treeCidStr: string): string = + getShardedPathStr(blockmapsDir, treeCidStr, ".blkmap") + +proc toSeqByte*(bm: FileBlockmap): seq[byte] = + let bitmapSize = (bm.maxIndex + 7) div 8 + result = newSeq[byte](bitmapSize.int) + for i in 0'u64 ..< bitmapSize: + let p = bm.bitmapBytePtr(i) + if p != nil: + result[i.int] = p[] + +proc fromSeqByte*(bm: FileBlockmap, data: seq[byte]): BResult[void] = + if bm.readOnly: + return err(ioError("Cannot write to read-only blockmap")) + + let maxIndex = data.len.uint64 * 8 + ?bm.ensureCapacity(maxIndex - 1) + + for i in 0'u64 ..< data.len.uint64: + let p = bm.bitmapBytePtr(i) + if p != nil: + p[] = data[i.int] + + bm.maxIndex = maxIndex + headerMaxIndex(bm.file.mem)[] = maxIndex + + let chunkCount = ((maxIndex + BlocksPerChunk - 1) div BlocksPerChunk).uint32 + for i in 0'u32 ..< chunkCount: + bm.setChunkState(i, ChunkPartial) + bm.compactIndex() + + ok() diff --git a/blockstore/blocks.nim b/blockstore/blocks.nim new file mode 100644 index 0000000..faffb25 --- /dev/null +++ b/blockstore/blocks.nim @@ -0,0 +1,92 @@ +import std/hashes +import results +import libp2p/multicodec + +import ./errors +import ./cid +import ./sha256 + +type + HashDigest* = array[32, byte] + HashFunc* = proc(data: openArray[byte]): HashDigest {.noSideEffect, gcsafe, raises: [].} + + BlockHashConfig* = object + hashFunc*: HashFunc + hashCode*: MultiCodec + blockCodec*: MultiCodec + treeCodec*: MultiCodec + + Block* = ref object + cid*: Cid + data*: seq[byte] + + BlockMetadata* = object + cid*: string + size*: int + index*: int + +proc sha256HashFunc*(data: openArray[byte]): HashDigest {.noSideEffect, gcsafe, raises: [].} = + sha256Hash(data) + +proc defaultBlockHashConfig*(): BlockHashConfig {.gcsafe.} = + BlockHashConfig( + hashFunc: sha256HashFunc, + hashCode: Sha256Code, + blockCodec: LogosStorageBlock, + treeCodec: LogosStorageTree + ) + +proc computeCid*(data: openArray[byte], config: BlockHashConfig): BResult[Cid] = + let + hash = config.hashFunc(data) + mh = ?wrap(config.hashCode, hash) + newCidV1(config.blockCodec, mh) + +proc computeCid*(data: openArray[byte]): BResult[Cid] = + computeCid(data, defaultBlockHashConfig()) + +proc newBlock*(data: seq[byte], config: BlockHashConfig): BResult[Block] = + let c = ?computeCid(data, config) + var blk = new(Block) + blk.cid = c + blk.data = data + ok(blk) + +proc newBlock*(data: seq[byte]): BResult[Block] = + newBlock(data, defaultBlockHashConfig()) + +proc newBlock*(data: string, config: BlockHashConfig): BResult[Block] = + newBlock(cast[seq[byte]](data), config) + +proc newBlock*(data: string): BResult[Block] = + newBlock(cast[seq[byte]](data), defaultBlockHashConfig()) + +proc fromCidUnchecked*(cid: Cid, data: seq[byte]): Block = + var blk = new(Block) + blk.cid = cid + blk.data = data + blk + +proc verify*(b: Block): BResult[bool] = + let computed = ?computeCid(b.data) + ok(computed == b.cid) + +proc size*(b: Block): int {.inline.} = + b.data.len + +proc `==`*(a, b: Block): bool = + a.cid == b.cid and a.data == b.data + +proc hash*(b: Block): Hash = + var h: Hash = 0 + h = h !& hash(b.cid.toBytes()) + !$h + +proc newBlockMetadata*(cid: Cid, size: int, index: int): BlockMetadata = + BlockMetadata(cid: $cid, size: size, index: index) + +proc `$`*(b: Block): string = + "Block(" & $b.cid & ", size=" & $b.size & ")" + +proc `$`*(m: BlockMetadata): string = + "BlockMetadata(cid=" & m.cid & ", size=" & $m.size & ", index=" & $m.index & ")" diff --git a/blockstore/blockstore.nim b/blockstore/blockstore.nim new file mode 100644 index 0000000..e03ff32 --- /dev/null +++ b/blockstore/blockstore.nim @@ -0,0 +1,23 @@ +import ./errors +import ./cid +import ./blocks +import ./serialization +import ./merkle +import ./chunker +import ./manifest +import ./repostore +import ./dataset + +export errors +export cid +export blocks +export serialization +export merkle +export chunker +export manifest +export repostore +export dataset + +const + BlockstoreVersion* = "0.1.0" + BlockstoreDescription* = "Nim blockstore" diff --git a/blockstore/chunker.nim b/blockstore/chunker.nim new file mode 100644 index 0000000..61a3291 --- /dev/null +++ b/blockstore/chunker.nim @@ -0,0 +1,261 @@ +import std/[os, options] +import chronos +import chronos/threadsync +import taskpools +import results + +import ./errors +import ./blocks as blk + +when defined(posix): + import std/posix + +when defined(windows): + import std/winlean + +const + DefaultChunkSize* = 64 * 1024 + MinPoolSize* = 2 #TODO cnanakos: figure what happens when 1 + +type + ChunkerConfig* = object + chunkSize*: int + + ReadResult = object + bytesRead: int + hasError: bool + error: string + + AsyncChunker* = ref object + config: ChunkerConfig + pool: Taskpool + ownsPool: bool + + AsyncChunkStream* = ref object + filePath: string + fd: cint + chunkSize: int + offset: int64 + index: int + finished: bool + pool: Taskpool + buffer: seq[byte] + + SyncChunker* = ref object + config: ChunkerConfig + + SyncChunkIterator* = ref object + file: File + chunkSize: int + buffer: seq[byte] + index: int + finished: bool + + +proc newChunkerConfig*(chunkSize: int = DefaultChunkSize): ChunkerConfig = + ChunkerConfig(chunkSize: chunkSize) + +proc defaultChunkerConfig*(): ChunkerConfig = + ChunkerConfig(chunkSize: DefaultChunkSize) + +proc newAsyncChunker*(pool: Taskpool): AsyncChunker = + AsyncChunker( + config: defaultChunkerConfig(), + pool: pool, + ownsPool: false + ) + +proc newAsyncChunker*(pool: Taskpool, config: ChunkerConfig): AsyncChunker = + AsyncChunker( + config: config, + pool: pool, + ownsPool: false + ) + +proc chunkSize*(chunker: AsyncChunker): int {.inline.} = + chunker.config.chunkSize + +proc shutdown*(chunker: AsyncChunker) = + if chunker.ownsPool: + chunker.pool.shutdown() + +proc readChunkWorker(fd: cint, offset: int64, size: int, + buffer: ptr byte, + signal: ThreadSignalPtr, + resultPtr: ptr ReadResult) {.gcsafe.} = + when defined(posix): + let bytesRead = pread(fd, buffer, size, offset.Off) + if bytesRead < 0: + resultPtr[].hasError = true + resultPtr[].error = "Read error: " & $strerror(errno) + else: + resultPtr[].bytesRead = bytesRead.int + resultPtr[].hasError = false + elif defined(windows): + var + overlapped: OVERLAPPED + bytesRead: DWORD + overlapped.Offset = cast[DWORD](offset and 0xFFFFFFFF'i64) + overlapped.OffsetHigh = cast[DWORD](offset shr 32) + let success = readFile(fd.Handle, buffer, size.DWORD, addr bytesRead, addr overlapped) + if success == 0: + resultPtr[].hasError = true + resultPtr[].error = "Read error" + else: + resultPtr[].bytesRead = bytesRead.int + resultPtr[].hasError = false + else: + {.error: "Unsupported platform".} + + discard signal.fireSync() + +proc chunkFile*(chunker: AsyncChunker, filePath: string): Future[BResult[AsyncChunkStream]] {.async.} = + if not fileExists(filePath): + return err(ioError("File not found: " & filePath)) + + when defined(posix): + let fd = open(filePath.cstring, O_RDONLY) + if fd < 0: + return err(ioError("Cannot open file: " & filePath)) + elif defined(windows): + let fd = createFileA(filePath.cstring, GENERIC_READ, FILE_SHARE_READ, + nil, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0) + if fd == INVALID_HANDLE_VALUE: + return err(ioError("Cannot open file: " & filePath)) + else: + {.error: "Unsupported platform".} + + let stream = AsyncChunkStream( + filePath: filePath, + fd: fd.cint, + chunkSize: chunker.config.chunkSize, + offset: 0, + index: 0, + finished: false, + pool: chunker.pool, + buffer: newSeq[byte](chunker.config.chunkSize) + ) + return ok(stream) + +proc currentIndex*(stream: AsyncChunkStream): int {.inline.} = + stream.index + +proc isFinished*(stream: AsyncChunkStream): bool {.inline.} = + stream.finished + +proc nextBlock*(stream: AsyncChunkStream): Future[Option[BResult[blk.Block]]] {.async.} = + if stream.finished: + return none(BResult[blk.Block]) + + let signalResult = ThreadSignalPtr.new() + if signalResult.isErr: + stream.finished = true + return some(BResult[blk.Block].err(ioError("Failed to create signal"))) + + let signal = signalResult.get() + var readResult: ReadResult + + stream.pool.spawn readChunkWorker(stream.fd, stream.offset, stream.chunkSize, + addr stream.buffer[0], signal, addr readResult) + + try: + await signal.wait() + except AsyncError as e: + discard signal.close() + stream.finished = true + return some(BResult[blk.Block].err(ioError("Signal wait failed: " & e.msg))) + except CancelledError: + discard signal.close() + stream.finished = true + return some(BResult[blk.Block].err(ioError("Operation cancelled"))) + + discard signal.close() + + if readResult.hasError: + stream.finished = true + return some(BResult[blk.Block].err(ioError(readResult.error))) + + if readResult.bytesRead == 0: + stream.finished = true + return none(BResult[blk.Block]) + + let + data = stream.buffer[0 ..< readResult.bytesRead] + blockResult = blk.newBlock(data) + + stream.offset += readResult.bytesRead + stream.index += 1 + + return some(blockResult) + +proc close*(stream: AsyncChunkStream) = + if not stream.finished: + when defined(posix): + discard posix.close(stream.fd) + elif defined(windows): + discard closeHandle(stream.fd.Handle) + stream.finished = true + +proc newSyncChunker*(): SyncChunker = + SyncChunker(config: defaultChunkerConfig()) + +proc newSyncChunker*(config: ChunkerConfig): SyncChunker = + SyncChunker(config: config) + +proc chunkFile*(chunker: SyncChunker, filePath: string): BResult[SyncChunkIterator] = + if not fileExists(filePath): + return err(ioError("File not found: " & filePath)) + + var file: File + if not open(file, filePath, fmRead): + return err(ioError("Cannot open file: " & filePath)) + + let iter = SyncChunkIterator( + file: file, + chunkSize: chunker.config.chunkSize, + buffer: newSeq[byte](chunker.config.chunkSize), + index: 0, + finished: false + ) + return ok(iter) + +proc currentIndex*(iter: SyncChunkIterator): int {.inline.} = + iter.index + +proc isFinished*(iter: SyncChunkIterator): bool {.inline.} = + iter.finished + +proc nextBlock*(iter: SyncChunkIterator): Option[BResult[blk.Block]] = + if iter.finished: + return none(BResult[blk.Block]) + + try: + let bytesRead = iter.file.readBytes(iter.buffer, 0, iter.chunkSize) + + if bytesRead == 0: + iter.finished = true + return none(BResult[blk.Block]) + + let + data = iter.buffer[0 ..< bytesRead] + blockResult = blk.newBlock(data) + iter.index += 1 + return some(blockResult) + + except IOError as e: + iter.finished = true + return some(BResult[blk.Block].err(ioError(e.msg))) + +proc close*(iter: SyncChunkIterator) = + iter.file.close() + iter.finished = true + +proc chunkData*(data: openArray[byte], chunkSize: int = DefaultChunkSize): seq[BResult[blk.Block]] = + result = @[] + var offset = 0 + while offset < data.len: + let + endOffset = min(offset + chunkSize, data.len) + chunk = data[offset ..< endOffset] + result.add(blk.newBlock(@chunk)) + offset = endOffset diff --git a/blockstore/cid.nim b/blockstore/cid.nim new file mode 100644 index 0000000..056c4d9 --- /dev/null +++ b/blockstore/cid.nim @@ -0,0 +1,132 @@ +import std/hashes +import results +import libp2p/cid as libp2pCid +import libp2p/[multicodec, multihash] +import ./errors + +type + Cid* = libp2pCid.Cid + CidVersion* = libp2pCid.CidVersion + CidError* = libp2pCid.CidError + +const + CIDv0* = libp2pCid.CIDv0 + CIDv1* = libp2pCid.CIDv1 + + LogosStorageManifest* = multiCodec("logos-storage-manifest") + LogosStorageBlock* = multiCodec("logos-storage-block") + LogosStorageTree* = multiCodec("logos-storage-tree") + Sha256Code* = multiCodec("sha2-256") + Sha256DigestSize* = 32 + Base32Alphabet* = "abcdefghijklmnopqrstuvwxyz234567" + + Base32DecodeTable*: array[256, int8] = block: + var t: array[256, int8] + for i in 0..255: + t[i] = -1 + for i, c in Base32Alphabet: + t[ord(c)] = int8(i) + t[ord(c) - 32] = int8(i) # uppercase + t + +proc wrap*(code: MultiCodec, digest: openArray[byte]): BResult[MultiHash] = + let mhResult = MultiHash.init(code, digest) + if mhResult.isErr: + return err(multihashError("Failed to create multihash")) + ok(mhResult.get()) + +proc newCidV1*(codec: MultiCodec, mh: MultiHash): BResult[Cid] = + let cidResult = Cid.init(libp2pCid.CIDv1, codec, mh) + if cidResult.isErr: + return err(cidError("Failed to create CID: " & $cidResult.error)) + ok(cidResult.get()) + +proc toBytes*(c: Cid): seq[byte] = + c.data.buffer + +proc mhash*(c: Cid): Result[MultiHash, CidError] = + libp2pCid.mhash(c) + +proc cidFromBytes*(data: openArray[byte]): BResult[Cid] = + let cidResult = Cid.init(data) + if cidResult.isErr: + return err(cidError("Failed to parse CID: " & $cidResult.error)) + ok(cidResult.get()) + +proc base32Encode*(data: openArray[byte]): string = + if data.len == 0: + return "" + + result = "" + var + buffer: uint64 = 0 + bits = 0 + + for b in data: + buffer = (buffer shl 8) or b.uint64 + bits += 8 + while bits >= 5: + bits -= 5 + let idx = (buffer shr bits) and 0x1F + result.add(Base32Alphabet[idx.int]) + + if bits > 0: + let idx = (buffer shl (5 - bits)) and 0x1F + result.add(Base32Alphabet[idx.int]) + +proc base32Decode*(s: string): BResult[seq[byte]] = + if s.len == 0: + return ok(newSeq[byte]()) + + var + buffer: uint64 = 0 + bits = 0 + res: seq[byte] = @[] + + for c in s: + let idx = Base32DecodeTable[ord(c)] + if idx < 0: + return err(cidError("Invalid base32 character: " & $c)) + + buffer = (buffer shl 5) or idx.uint64 + bits += 5 + + if bits >= 8: + bits -= 8 + res.add(((buffer shr bits) and 0xFF).byte) + + ok(res) + +proc `$`*(c: Cid): string = + "b" & base32Encode(c.data.buffer) + +proc cidFromString*(s: string): BResult[Cid] = + if s.len < 2: + return err(cidError("CID string too short")) + + if s[0] == 'b': + let decoded = ?base32Decode(s[1 .. ^1]) + return cidFromBytes(decoded) + else: + let cidResult = Cid.init(s) + if cidResult.isErr: + return err(cidError("Failed to parse CID: " & $cidResult.error)) + ok(cidResult.get()) + +proc `<`*(a, b: Cid): bool = + let + aData = a.data.buffer + bData = b.data.buffer + minLen = min(aData.len, bData.len) + for i in 0 ..< minLen: + if aData[i] < bData[i]: return true + elif aData[i] > bData[i]: return false + aData.len < bData.len + +proc cmp*(a, b: Cid): int = + if a < b: -1 + elif b < a: 1 + else: 0 + +proc hash*(c: Cid): Hash {.inline.} = + hash(c.data.buffer) diff --git a/blockstore/contentids_exts.nim b/blockstore/contentids_exts.nim new file mode 100644 index 0000000..ab9fd5b --- /dev/null +++ b/blockstore/contentids_exts.nim @@ -0,0 +1,8 @@ +## LogosStorage content ID extensions for libp2p CID +import libp2p/multicodec + +const ContentIdsExts* = @[ + multiCodec("logos-storage-manifest"), + multiCodec("logos-storage-block"), + multiCodec("logos-storage-tree"), +] diff --git a/blockstore/dataset.nim b/blockstore/dataset.nim new file mode 100644 index 0000000..e2d5534 --- /dev/null +++ b/blockstore/dataset.nim @@ -0,0 +1,1275 @@ +import std/[times, algorithm, strutils, streams, options, sequtils, random, os] +import chronos +import chronos/timer as ctimer +import chronos/threadsync +import taskpools +import leveldbstatic as leveldb + +import ./errors +import ./cid +import ./blocks as blk +import ./merkle +import ./manifest +import ./repostore +import ./serialization as ser +import ./sharding +import ./ioutils +import ./blockmap +import ./chunker + +const + DatasetMetadataPrefix = "dataset_metadata:" + DatasetBlocksPrefix = "dataset_blocks:" + BlockmapsPrefix = "blockmaps:" + ManifestsPrefix = "manifests:" + MerkleBackendConfigKey = "blockstore_config:merkle_backend" + BlockBackendConfigKey = "blockstore_config:block_backend" + BlockmapBackendConfigKey = "blockstore_config:blockmap_backend" + + DeletionBatchSize = 100 + DefaultDeletionPoolSize = 2 + +let DefaultDeletionWorkerInterval* = ctimer.milliseconds(100) + +type + DeletionTask = object + path: string + cidStr: string + size: uint64 + + DeletionBatchResult = object + deletedCids: seq[string] + totalFreed: uint64 + count: int + + BlockBackend* = enum + bbSharded + bbPacked + + DatasetMetadata = object + treeCid: string + manifestCid: string + lastAccess: uint64 + size: uint64 + blockCount: int + chunkSize: uint32 + treeId: string + merkleBackend: uint8 + blockBackend: uint8 + blockmapBackend: uint8 + + BlockRef = object + blockCid: string + proof: MerkleProof + + BlockRefSimple = object + blockCid: string + + Dataset* = ref object + treeCid*: Cid + manifestCid*: Cid + blockCount*: int + chunkSize: uint32 + totalSize: uint64 + treeId: string + repo: RepoStore + db: LevelDb + blockmapBackend: BlockmapBackend + blockmapSeq: seq[byte] + blockmapFile: FileBlockmap + merkleBackend: MerkleBackend + merkleReader: MerkleReader + treesDir: string + blockBackend: BlockBackend + dataDir: string + dataFile: File + + PendingDeletionWorker* = ref object + store: DatasetStore + running: bool + task: Future[void] + interval: ctimer.Duration + + DatasetStore* = ref object + repo*: RepoStore + db: LevelDb + blockHashConfig*: BlockHashConfig + merkleBackend*: MerkleBackend + blockBackend*: BlockBackend + blockmapBackend*: BlockmapBackend + ioMode*: IOMode + syncBatchSize*: int + treesDir*: string + dataDir*: string + blockmapsDir*: string + deletionWorker: PendingDeletionWorker + deletionPool: Taskpool + ownsDeletionPool: bool + + DatasetBuilder* = ref object + chunkSize: uint32 + filename: Option[string] + mimetype: Option[string] + blockHashConfig: BlockHashConfig + merkleBuilder: MerkleTreeBuilder + blockCids: seq[Cid] + streamingBuilder: StreamingMerkleBuilder + merkleStorage: MerkleStorage + treeId: string + merkleBackend: MerkleBackend + blockIndex: int + totalSize: uint64 + store: DatasetStore + blockBackend: BlockBackend + blockmapBackend: BlockmapBackend + ioMode: IOMode + writeHandle: WriteHandle + + LruIterator* = ref object + datasets: seq[Cid] + index: int + +proc hasBlock(dataset: Dataset, index: int): bool {.inline.} = + case dataset.blockmapBackend + of bmLevelDb: + blockmapGet(dataset.blockmapSeq, index) + of bmFile: + dataset.blockmapFile.get(index.uint64) + +proc markBlock(dataset: Dataset, index: int, value: bool): BResult[void] {.inline.} = + case dataset.blockmapBackend + of bmLevelDb: + blockmapSet(dataset.blockmapSeq, index, value) + ok() + of bmFile: + if value: + ?dataset.blockmapFile.set(index.uint64) + else: + ?dataset.blockmapFile.clear(index.uint64) + ok() + +proc flushBlockmap(dataset: Dataset) = + case dataset.blockmapBackend + of bmLevelDb: + discard + of bmFile: + dataset.blockmapFile.flush() + +proc close*(dataset: Dataset) = + case dataset.blockmapBackend + of bmLevelDb: + discard + of bmFile: + if dataset.blockmapFile != nil: + dataset.blockmapFile.close() + if dataset.dataFile != nil: + dataset.dataFile.close() + if dataset.merkleReader != nil: + dataset.merkleReader.close() + +proc writeDatasetMetadata(s: Stream, m: DatasetMetadata) {.gcsafe.} = + ser.writeString(s, m.treeCid) + ser.writeString(s, m.manifestCid) + ser.writeUint64(s, m.lastAccess) + ser.writeUint64(s, m.size) + ser.writeUint64(s, m.blockCount.uint64) + ser.writeUint32(s, m.chunkSize) + ser.writeString(s, m.treeId) + ser.writeUint8(s, m.merkleBackend) + ser.writeUint8(s, m.blockBackend) + ser.writeUint8(s, m.blockmapBackend) + +proc readDatasetMetadata(s: Stream): BResult[DatasetMetadata] {.gcsafe.} = + var m: DatasetMetadata + m.treeCid = ?ser.readString(s) + m.manifestCid = ?ser.readString(s) + m.lastAccess = ser.readUint64(s) + m.size = ser.readUint64(s) + m.blockCount = ser.readUint64(s).int + m.chunkSize = ser.readUint32(s) + m.treeId = ?ser.readString(s) + m.merkleBackend = ser.readUint8(s) + m.blockBackend = ser.readUint8(s) + m.blockmapBackend = ser.readUint8(s) + ok(m) + +proc serializeDatasetMetadata(m: DatasetMetadata): BResult[seq[byte]] = + ser.toBytes(m, writeDatasetMetadata) + +proc deserializeDatasetMetadata(data: openArray[byte]): BResult[DatasetMetadata] = + ser.fromBytesResult(data, readDatasetMetadata) + +proc writeBlockRefSimple(s: Stream, r: BlockRefSimple) {.gcsafe.} = + ser.writeString(s, r.blockCid) + +proc readBlockRefSimple(s: Stream): BResult[BlockRefSimple] {.gcsafe.} = + var r: BlockRefSimple + r.blockCid = ?ser.readString(s) + ok(r) + +proc serializeBlockRefSimple(r: BlockRefSimple): BResult[seq[byte]] = + ser.toBytes(r, writeBlockRefSimple) + +proc deserializeBlockRefSimple(data: openArray[byte]): BResult[BlockRefSimple] = + ser.fromBytesResult(data, readBlockRefSimple) + +proc writeMerkleProofNode(s: Stream, node: MerkleProofNode) {.gcsafe.} = + s.write(node.hash) + ser.writeUint32(s, node.level.uint32) + +proc readMerkleProofNode(s: Stream): BResult[MerkleProofNode] {.gcsafe.} = + var node: MerkleProofNode + let bytesRead = s.readData(addr node.hash[0], HashSize) + if bytesRead != HashSize: + return err(ioError("Failed to read " & $HashSize & " bytes, got " & $bytesRead)) + node.level = ser.readUint32(s).int + ok(node) + +proc writeStreamingMerkleProof(s: Stream, proof: MerkleProof) {.gcsafe.} = + ser.writeUint64(s, proof.index) + ser.writeUint64(s, proof.path.len.uint64) + for node in proof.path: + writeMerkleProofNode(s, node) + ser.writeUint64(s, proof.leafCount) + +proc readStreamingMerkleProof(s: Stream): BResult[MerkleProof] {.gcsafe.} = + var proof: MerkleProof + proof.index = ser.readUint64(s) + let pathLen = ser.readUint64(s).int + proof.path = newSeq[MerkleProofNode](pathLen) + for i in 0 ..< pathLen: + proof.path[i] = ?readMerkleProofNode(s) + proof.leafCount = ser.readUint64(s) + ok(proof) + +proc writeBlockRef(s: Stream, r: BlockRef) {.gcsafe.} = + ser.writeString(s, r.blockCid) + writeStreamingMerkleProof(s, r.proof) + +proc readBlockRef(s: Stream): BResult[BlockRef] {.gcsafe.} = + var r: BlockRef + r.blockCid = ?ser.readString(s) + r.proof = ?readStreamingMerkleProof(s) + ok(r) + +proc serializeBlockRef(r: BlockRef): BResult[seq[byte]] = + ser.toBytes(r, writeBlockRef) + +proc deserializeBlockRef(data: openArray[byte]): BResult[BlockRef] = + ser.fromBytesResult(data, readBlockRef) + +proc datasetMetadataKey(treeCid: string): string {.inline.} = + DatasetMetadataPrefix & treeCid + +proc datasetBlockKey(treeId: string, index: int): string {.inline.} = + DatasetBlocksPrefix & treeId & ":" & align($index, 10, '0') + +proc blockmapKey(treeCid: string): string {.inline.} = + BlockmapsPrefix & treeCid + +proc manifestKey(manifestCid: string): string {.inline.} = + ManifestsPrefix & manifestCid + +proc generateTreeId(): string = + var r = initRand() + result = "" + for i in 0 ..< 16: + result.add(char(r.rand(25) + ord('a'))) + +proc finalizeTreeFile(store: DatasetStore, tempPath: string, treeCid: Cid): BResult[void] = + let finalPath = getShardedPath(store.treesDir, treeCid, ".tree") + ?atomicRename(tempPath, finalPath) + ok() + +proc finalizeDataFile(store: DatasetStore, tempPath: string, treeCid: Cid): BResult[void] = + let finalPath = getShardedPath(store.dataDir, treeCid, ".data") + ?atomicRename(tempPath, finalPath) + ok() + +proc finalizeBlockmapFile(store: DatasetStore, tempPath: string, treeCid: Cid): BResult[void] = + let finalPath = getBlockmapPath(store.blockmapsDir, treeCid) + ?atomicRename(tempPath, finalPath) + ok() + +proc deleteBatchWorker(tasks: ptr seq[DeletionTask], result: ptr DeletionBatchResult, + signal: ThreadSignalPtr) {.gcsafe.} = + result[].deletedCids = @[] + result[].totalFreed = 0 + result[].count = 0 + + for task in tasks[]: + if fileExists(task.path): + try: + removeFile(task.path) + result[].deletedCids.add(task.cidStr) + result[].totalFreed += task.size + result[].count += 1 + except OSError: + discard + else: + # TODO: cnanakos: file already gone??? still mark as deleted + result[].deletedCids.add(task.cidStr) + result[].count += 1 + + discard signal.fireSync() + +proc processPendingDeletions*(store: DatasetStore): Future[BResult[int]] {.async.} = + if store.deletionPool == nil: + return ok(0) + + let pendingResult = store.repo.getPendingDeletions(DeletionBatchSize) + if pendingResult.isErr: + return err(pendingResult.error) + + let pending = pendingResult.value + if pending.len == 0: + return ok(0) + + var locks: seq[(string, CidLock)] = @[] + for (cidStr, _) in pending: + let cl = await store.repo.acquireCidLock(cidStr) + locks.add((cidStr, cl)) + + proc releaseAllLocks() = + for (cidStr, cl) in locks: + store.repo.releaseCidLock(cl, cidStr) + + var + tasksPtr = cast[ptr seq[DeletionTask]](alloc0(sizeof(seq[DeletionTask]))) + resultPtr = cast[ptr DeletionBatchResult](alloc0(sizeof(DeletionBatchResult))) + + tasksPtr[] = @[] + for (cidStr, pd) in pending: + tasksPtr[].add(DeletionTask(path: pd.blockPath, cidStr: cidStr, size: pd.size)) + + let signalResult = ThreadSignalPtr.new() + if signalResult.isErr: + dealloc(tasksPtr) + dealloc(resultPtr) + releaseAllLocks() + return err(ioError("Failed to create thread signal")) + + let signal = signalResult.get() + + store.deletionPool.spawn deleteBatchWorker(tasksPtr, resultPtr, signal) + + try: + await signal.wait() + except AsyncError as e: + discard signal.close() + dealloc(tasksPtr) + dealloc(resultPtr) + releaseAllLocks() + return err(ioError("Async error waiting for deletion: " & e.msg)) + except CancelledError: + discard signal.close() + dealloc(tasksPtr) + dealloc(resultPtr) + releaseAllLocks() + return err(ioError("Deletion cancelled")) + + discard signal.close() + + let + totalFreed = resultPtr[].totalFreed + deletedCids = resultPtr[].deletedCids + count = resultPtr[].count + + dealloc(tasksPtr) + dealloc(resultPtr) + + releaseAllLocks() + + if totalFreed > 0: + store.repo.decreaseUsed(totalFreed) + + if deletedCids.len > 0: + ?store.repo.removePendingDeletionsBatch(deletedCids) + + return ok(count) + +proc deletionWorkerLoop(worker: PendingDeletionWorker) {.async.} = + while worker.running: + discard await worker.store.processPendingDeletions() + await sleepAsync(worker.interval) + +proc startDeletionWorker*(store: DatasetStore, interval: ctimer.Duration = DefaultDeletionWorkerInterval) = + if store.deletionWorker != nil and store.deletionWorker.running: + return + + store.deletionWorker = PendingDeletionWorker( + store: store, + running: true, + interval: interval + ) + store.deletionWorker.task = deletionWorkerLoop(store.deletionWorker) + +proc stopDeletionWorker*(store: DatasetStore) {.async.} = + if store.deletionWorker == nil or not store.deletionWorker.running: + return + + store.deletionWorker.running = false + if store.deletionWorker.task != nil: + try: + await store.deletionWorker.task + except CancelledError: + discard + store.deletionWorker = nil + +proc newDatasetStore*(dbPath: string, blocksDir: string, quota: uint64 = 0, + blockHashConfig: BlockHashConfig = defaultBlockHashConfig(), + merkleBackend: MerkleBackend = mbPacked, + blockBackend: BlockBackend = bbSharded, + blockmapBackend: BlockmapBackend = bmLevelDb, + ioMode: IOMode = ioDirect, + syncBatchSize: int = 0, + pool: Taskpool = nil): BResult[DatasetStore] = + var db: LevelDb + try: + db = leveldb.open(dbPath) + except LevelDbException as e: + return err(databaseError(e.msg)) + + try: + let existingConfig = db.get(MerkleBackendConfigKey) + if existingConfig.isSome: + let storedBackend = MerkleBackend(parseInt(existingConfig.get())) + if storedBackend != merkleBackend: + db.close() + return err(backendMismatchError( + "Repository merkle backend was " & $storedBackend & " but " & $merkleBackend & " was requested")) + else: + db.put(MerkleBackendConfigKey, $ord(merkleBackend)) + except LevelDbException as e: + db.close() + return err(databaseError(e.msg)) + except ValueError as e: + db.close() + return err(databaseError("Invalid merkle backend config: " & e.msg)) + + try: + let existingConfig = db.get(BlockBackendConfigKey) + if existingConfig.isSome: + let storedBackend = BlockBackend(parseInt(existingConfig.get())) + if storedBackend != blockBackend: + db.close() + return err(backendMismatchError( + "Repository block backend was " & $storedBackend & " but " & $blockBackend & " was requested")) + else: + db.put(BlockBackendConfigKey, $ord(blockBackend)) + except LevelDbException as e: + db.close() + return err(databaseError(e.msg)) + except ValueError as e: + db.close() + return err(databaseError("Invalid block backend config: " & e.msg)) + + try: + let existingConfig = db.get(BlockmapBackendConfigKey) + if existingConfig.isSome: + let storedBackend = BlockmapBackend(parseInt(existingConfig.get())) + if storedBackend != blockmapBackend: + db.close() + return err(backendMismatchError( + "Repository blockmap backend was " & $storedBackend & " but " & $blockmapBackend & " was requested")) + else: + db.put(BlockmapBackendConfigKey, $ord(blockmapBackend)) + except LevelDbException as e: + db.close() + return err(databaseError(e.msg)) + except ValueError as e: + db.close() + return err(databaseError("Invalid blockmap backend config: " & e.msg)) + + let + parentDir = parentDir(dbPath) + treesDir = parentDir / "trees" + dataDir = parentDir / "data" + blockmapsDir = parentDir / "blockmaps" + + if merkleBackend == mbPacked: + let shardResult = initShardDirectories(treesDir) + if shardResult.isErr: + db.close() + return err(shardResult.error) + cleanupTmpDir(treesDir) + + if blockBackend == bbPacked: + let shardResult = initShardDirectories(dataDir) + if shardResult.isErr: + db.close() + return err(shardResult.error) + cleanupTmpDir(dataDir) + + if blockmapBackend == bmFile: + let shardResult = initShardDirectories(blockmapsDir) + if shardResult.isErr: + db.close() + return err(shardResult.error) + cleanupTmpDir(blockmapsDir) + + let repo = ?newRepoStore(blocksDir, db, quota, ioMode, syncBatchSize) + + var + ownsDeletionPool = false + deletionPool: Taskpool + if pool == nil: + deletionPool = Taskpool.new(numThreads = DefaultDeletionPoolSize) + ownsDeletionPool = true + else: + deletionPool = pool + + var store = DatasetStore( + repo: repo, + db: db, + blockHashConfig: blockHashConfig, + merkleBackend: merkleBackend, + blockBackend: blockBackend, + blockmapBackend: blockmapBackend, + ioMode: ioMode, + syncBatchSize: syncBatchSize, + treesDir: treesDir, + dataDir: dataDir, + blockmapsDir: blockmapsDir, + deletionPool: deletionPool, + ownsDeletionPool: ownsDeletionPool + ) + + store.startDeletionWorker() + + ok(store) + +proc closeAsync*(store: DatasetStore) {.async.} = + await store.stopDeletionWorker() + # cnanakos: We intentionally don't call deletionPool.shutdown() here. + # Taskpools uses global static variables that can conflict with other + # taskpool instances (e.g., nimble). The pool threads will be cleaned + # up when the process exits afterall or the caller will handle the taskpools. + store.repo.close() + store.db.close() + +proc close*(store: DatasetStore) = + waitFor store.closeAsync() + +proc used*(store: DatasetStore): uint64 = + store.repo.used() + +proc quota*(store: DatasetStore): uint64 = + store.repo.quota() + +proc getRepo*(store: DatasetStore): RepoStore = + store.repo + +proc getManifest*(store: DatasetStore, manifestCid: Cid): Future[BResult[Option[Manifest]]] {.async.} = + try: + let + key = manifestKey($manifestCid) + valueOpt= store.db.get(key) + if valueOpt.isNone: + return ok(none(Manifest)) + + let manifest = ?decodeManifest(cast[seq[byte]](valueOpt.get)) + return ok(some(manifest)) + except LevelDbException as e: + return err(databaseError(e.msg)) + +proc getDataset*(store: DatasetStore, treeCid: Cid): Future[BResult[Option[Dataset]]] {.async.} = + try: + let + metaKey = datasetMetadataKey($treeCid) + metaValueOpt = store.db.get(metaKey) + if metaValueOpt.isNone: + return ok(none(Dataset)) + + let + meta = ?deserializeDatasetMetadata(cast[seq[byte]](metaValueOpt.get)) + manifestCid = ?cidFromString(meta.manifestCid) + merkleBackend = MerkleBackend(meta.merkleBackend) + blockBackend = BlockBackend(meta.blockBackend) + blockmapBackend = BlockmapBackend(meta.blockmapBackend) + + var + blockmapSeq: seq[byte] + blockmapFile: FileBlockmap = nil + + case blockmapBackend + of bmLevelDb: + let + bmKey = blockmapKey($treeCid) + bmValueOpt = store.db.get(bmKey) + if bmValueOpt.isSome and bmValueOpt.get().len > 0: + blockmapSeq = cast[seq[byte]](bmValueOpt.get) + else: + blockmapSeq = newBlockmap(meta.blockCount) + of bmFile: + let bmPath = getBlockmapPath(store.blockmapsDir, treeCid) + blockmapFile = ?newFileBlockmap(bmPath, forWriting = true) + + var merkleReader: MerkleReader = nil + case merkleBackend + of mbEmbeddedProofs: + discard + of mbLevelDb: + let storage = newLevelDbMerkleStorage(store.db, MerkleTreePrefix & meta.treeId) + merkleReader = newMerkleReader(storage) + of mbPacked: + let + treePath = getShardedPath(store.treesDir, treeCid, ".tree") + storage = ?newPackedMerkleStorage(treePath, forWriting = false) + merkleReader = newMerkleReader(storage) + + var dataFile: File = nil + case blockBackend + of bbSharded: + discard + of bbPacked: + let dataPath = getShardedPath(store.dataDir, treeCid, ".data") + dataFile = open(dataPath, fmRead) + + var dataset = Dataset( + treeCid: treeCid, + manifestCid: manifestCid, + blockCount: meta.blockCount, + chunkSize: meta.chunkSize, + totalSize: meta.size, + treeId: meta.treeId, + repo: store.repo, + db: store.db, + blockmapBackend: blockmapBackend, + blockmapSeq: blockmapSeq, + blockmapFile: blockmapFile, + merkleBackend: merkleBackend, + merkleReader: merkleReader, + treesDir: store.treesDir, + blockBackend: blockBackend, + dataDir: store.dataDir, + dataFile: dataFile + ) + + return ok(some(dataset)) + except LevelDbException as e: + return err(databaseError(e.msg)) + except IOError as e: + return err(ioError(e.msg)) + +proc createDataset*(store: DatasetStore, manifest: Manifest): Future[BResult[Dataset]] {.async.} = + let + manifestCid = ?manifest.toCid() + treeCid = ?cidFromBytes(manifest.treeCid) + blockCount = manifest.blocksCount() + + let existingOpt = ?await store.getDataset(treeCid) + if existingOpt.isSome: + return ok(existingOpt.get) + + let + now = epochTime().uint64 + treeId = generateTreeId() + + var + blockmapSeq: seq[byte] = @[] + blockmapFile: FileBlockmap = nil + + case store.blockmapBackend + of bmLevelDb: + blockmapSeq = newBlockmap(blockCount) + of bmFile: + let bmPath = getBlockmapPath(store.blockmapsDir, treeCid) + blockmapFile = ?newFileBlockmap(bmPath, forWriting = true) + + let meta = DatasetMetadata( + treeCid: $treeCid, + manifestCid: $manifestCid, + lastAccess: now, + size: manifest.datasetSize, + blockCount: blockCount, + chunkSize: manifest.blockSize, + treeId: treeId, + merkleBackend: uint8(store.merkleBackend), + blockBackend: uint8(store.blockBackend), + blockmapBackend: uint8(store.blockmapBackend) + ) + + let + manifestBytes = ?encodeManifest(manifest) + metaBytes = ?serializeDatasetMetadata(meta) + + try: + let batch = newBatch() + batch.put(datasetMetadataKey($treeCid), cast[string](metaBytes)) + if store.blockmapBackend == bmLevelDb: + batch.put(blockmapKey($treeCid), cast[string](blockmapSeq)) + batch.put(manifestKey($manifestCid), cast[string](manifestBytes)) + store.db.write(batch) + + var dataset = Dataset( + treeCid: treeCid, + manifestCid: manifestCid, + blockCount: blockCount, + chunkSize: manifest.blockSize, + totalSize: manifest.datasetSize, + treeId: treeId, + repo: store.repo, + db: store.db, + blockmapBackend: store.blockmapBackend, + blockmapSeq: blockmapSeq, + blockmapFile: blockmapFile, + merkleBackend: store.merkleBackend, + blockBackend: store.blockBackend + ) + + return ok(dataset) + except LevelDbException as e: + return err(databaseError(e.msg)) + +proc deleteDataset*(store: DatasetStore, manifestCid: Cid): Future[BResult[void]] {.async.} = + let manifestOpt = ?await store.getManifest(manifestCid) + if manifestOpt.isNone: + return err(datasetNotFoundError()) + + let + manifest = manifestOpt.get + treeCid = ?cidFromBytes(manifest.treeCid) + treeCidStr = $treeCid + + var treeId: string + try: + let + metaKey = datasetMetadataKey(treeCidStr) + metaValueOpt = store.db.get(metaKey) + if metaValueOpt.isSome: + let meta = ?deserializeDatasetMetadata(cast[seq[byte]](metaValueOpt.get)) + treeId = meta.treeId + else: + treeId = "" + except LevelDbException as e: + return err(databaseError(e.msg)) + + try: + let batch = newBatch() + batch.delete(manifestKey($manifestCid)) + batch.delete(datasetMetadataKey(treeCidStr)) + batch.delete(blockmapKey(treeCidStr)) + store.db.write(batch) + except LevelDbException as e: + return err(databaseError(e.msg)) + + if treeId.len > 0: + const batchSize = 1000 + let prefix = DatasetBlocksPrefix & treeId & ":" + + while true: + var batch: seq[(string, Cid)] = @[] + try: + for key, value in store.db.iter(): + if not key.startsWith(prefix): + if key > prefix: + break + continue + + let blockRefSimpleResult = deserializeBlockRefSimple(cast[seq[byte]](value)) + if blockRefSimpleResult.isOk: + let blockCidResult = cidFromString(blockRefSimpleResult.value.blockCid) + if blockCidResult.isOk: + batch.add((key, blockCidResult.value)) + if batch.len >= batchSize: + break + else: + let blockRefResult = deserializeBlockRef(cast[seq[byte]](value)) + if blockRefResult.isOk: + let blockCidResult = cidFromString(blockRefResult.value.blockCid) + if blockCidResult.isOk: + batch.add((key, blockCidResult.value)) + if batch.len >= batchSize: + break + except LevelDbException as e: + return err(databaseError(e.msg)) + + if batch.len == 0: + break + + if store.blockBackend == bbSharded: + for (_, c) in batch: + discard store.repo.releaseBlock(c) + + try: + let dbBatch = newBatch() + for (key, _) in batch: + dbBatch.delete(key) + store.db.write(dbBatch) + except LevelDbException as e: + return err(databaseError(e.msg)) + + if store.merkleBackend == mbPacked: + let treePath = getShardedPathStr(store.treesDir, treeCidStr, ".tree") + if fileExists(treePath): + removeFile(treePath) + + if store.blockBackend == bbPacked: + let dataPath = getShardedPathStr(store.dataDir, treeCidStr, ".data") + if fileExists(dataPath): + removeFile(dataPath) + + if store.blockmapBackend == bmFile: + let bmPath = getBlockmapPathStr(store.blockmapsDir, treeCidStr) + if fileExists(bmPath): + removeFile(bmPath) + + return ok() + +proc startDataset*(store: DatasetStore, chunkSize: uint32, filename: Option[string] = none(string)): BResult[DatasetBuilder] = + ?validateChunkSize(chunkSize) + + let treeId = generateTreeId() + + var builder = DatasetBuilder( + chunkSize: chunkSize, + filename: filename, + mimetype: none(string), + blockHashConfig: store.blockHashConfig, + merkleBackend: store.merkleBackend, + blockBackend: store.blockBackend, + blockmapBackend: store.blockmapBackend, + ioMode: store.ioMode, + treeId: treeId, + blockIndex: 0, + blockCids: @[], + totalSize: 0, + store: store + ) + + case store.merkleBackend + of mbEmbeddedProofs: + builder.merkleBuilder = newMerkleTreeBuilder() + of mbLevelDb: + builder.merkleStorage = newLevelDbMerkleStorage(store.db, MerkleTreePrefix & treeId) + builder.streamingBuilder = newStreamingMerkleBuilder(builder.merkleStorage) + of mbPacked: + let treePath = getTmpPath(store.treesDir, treeId, ".tree") + builder.merkleStorage = ?newPackedMerkleStorage(treePath, forWriting = true) + builder.streamingBuilder = newStreamingMerkleBuilder(builder.merkleStorage) + + case store.blockBackend + of bbSharded: + discard + of bbPacked: + let + dataPath = getTmpPath(store.dataDir, treeId, ".data") + handleResult = ioutils.openForWrite(dataPath, store.ioMode, chunkSize.int, syncNone()) + if handleResult.isErr: + return err(handleResult.error) + builder.writeHandle = handleResult.value + + ok(builder) + +proc lru*(store: DatasetStore): BResult[LruIterator] = + var datasets: seq[(Cid, uint64)] = @[] + + try: + for key, value in store.db.iter(): + if not key.startsWith(DatasetMetadataPrefix): + continue + + let metaResult = deserializeDatasetMetadata(cast[seq[byte]](value)) + if metaResult.isOk: + let + cidStr = key[DatasetMetadataPrefix.len .. ^1] + cidResult = cidFromString(cidStr) + if cidResult.isOk: + datasets.add((cidResult.value, metaResult.value.lastAccess)) + + datasets.sort(proc(a, b: (Cid, uint64)): int = cmp(a[1], b[1])) + + ok(LruIterator( + datasets: datasets.mapIt(it[0]), + index: 0 + )) + except LevelDbException as e: + err(databaseError(e.msg)) + +proc filterPresent*(dataset: Dataset, indices: openArray[int]): seq[int] = + result = @[] + for i in indices: + if dataset.hasBlock(i): + result.add(i) + +proc getBlockmapRanges*(dataset: Dataset): seq[BlockRange] = + case dataset.blockmapBackend + of bmLevelDb: + result = @[] + var inRange = false + var rangeStart: uint64 = 0 + for i in 0 ..< dataset.blockCount: + let present = blockmapGet(dataset.blockmapSeq, i) + if present and not inRange: + rangeStart = i.uint64 + inRange = true + elif not present and inRange: + result.add(BlockRange(start: rangeStart, count: i.uint64 - rangeStart)) + inRange = false + if inRange: + result.add(BlockRange(start: rangeStart, count: dataset.blockCount.uint64 - rangeStart)) + of bmFile: + result = dataset.blockmapFile.toRanges() + +proc completed*(dataset: Dataset): int = + case dataset.blockmapBackend + of bmLevelDb: + blockmapCountOnes(dataset.blockmapSeq) + of bmFile: + dataset.blockmapFile.countOnes().int + +proc touch(dataset: Dataset): BResult[void] = + let now = epochTime().uint64 + + try: + let key = datasetMetadataKey($dataset.treeCid) + let valueOpt = dataset.db.get(key) + if valueOpt.isNone: + return ok() + + var meta = ?deserializeDatasetMetadata(cast[seq[byte]](valueOpt.get)) + meta.lastAccess = now + let metaBytes = ?serializeDatasetMetadata(meta) + dataset.db.put(key, cast[string](metaBytes)) + + ok() + except LevelDbException as e: + err(databaseError(e.msg)) + +proc saveBlockmap(dataset: Dataset): BResult[void] = + case dataset.blockmapBackend + of bmLevelDb: + try: + dataset.db.put(blockmapKey($dataset.treeCid), cast[string](dataset.blockmapSeq)) + ok() + except LevelDbException as e: + err(databaseError(e.msg)) + of bmFile: + dataset.flushBlockmap() + ok() + +proc putBlock*(dataset: Dataset, b: blk.Block, index: int, proof: MerkleProof): Future[BResult[void]] {.async.} = + if index < 0 or index >= dataset.blockCount: + return err(invalidBlockError()) + + if dataset.hasBlock(index): + return ok() + + if proof.leafCount != uint64(dataset.blockCount): + return err(invalidProofError()) + + discard ?await dataset.repo.putBlock(b) + + let key = datasetBlockKey(dataset.treeId, index) + + var blockRefBytes: seq[byte] + case dataset.merkleBackend + of mbEmbeddedProofs: + let blockRef = BlockRef(blockCid: $b.cid, proof: proof) + blockRefBytes = ?serializeBlockRef(blockRef) + of mbLevelDb, mbPacked: + let blockRef = BlockRefSimple(blockCid: $b.cid) + blockRefBytes = ?serializeBlockRefSimple(blockRef) + + try: + dataset.db.put(key, cast[string](blockRefBytes)) + + ?dataset.markBlock(index, true) + + ?dataset.saveBlockmap() + + return ok() + except LevelDbException as e: + return err(databaseError(e.msg)) + +proc getBlock*(dataset: Dataset, index: int): Future[BResult[Option[(blk.Block, MerkleProof)]]] {.async.} = + if index < 0 or index >= dataset.blockCount: + return err(invalidBlockError()) + + if not dataset.hasBlock(index): + return ok(none((blk.Block, MerkleProof))) + + try: + let + key = datasetBlockKey(dataset.treeId, index) + valueOpt = dataset.db.get(key) + if valueOpt.isNone: + return ok(none((blk.Block, MerkleProof))) + + var + blockCid: Cid + proof: MerkleProof + + case dataset.merkleBackend + of mbEmbeddedProofs: + let blockRef = ?deserializeBlockRef(cast[seq[byte]](valueOpt.get)) + blockCid = ?cidFromString(blockRef.blockCid) + proof = blockRef.proof + of mbLevelDb, mbPacked: + let blockRef = ?deserializeBlockRefSimple(cast[seq[byte]](valueOpt.get)) + blockCid = ?cidFromString(blockRef.blockCid) + if dataset.merkleReader.isNil: + return err(merkleTreeError("Merkle reader not initialized")) + try: + proof = ?dataset.merkleReader.getProof(uint64(index)) + except CatchableError as e: + return err(merkleTreeError("Failed to get proof: " & e.msg)) + except Exception as e: + return err(merkleTreeError("Failed to get proof: " & e.msg)) + + var b: blk.Block + case dataset.blockBackend + of bbSharded: + let blockOpt = ?await dataset.repo.getBlock(blockCid) + if blockOpt.isNone: + return ok(none((blk.Block, MerkleProof))) + b = blockOpt.get + of bbPacked: + let + offset = int64(index) * int64(dataset.chunkSize) + isLastBlock = (index == dataset.blockCount - 1) + let blockSize = if isLastBlock: + let remainder = dataset.totalSize mod dataset.chunkSize.uint64 + if remainder == 0: dataset.chunkSize.int else: remainder.int + else: + dataset.chunkSize.int + + var data = newSeq[byte](blockSize) + dataset.dataFile.setFilePos(offset) + let bytesRead = dataset.dataFile.readBytes(data, 0, blockSize) + if bytesRead != blockSize: + return err(ioError("Failed to read complete block from packed file")) + + b = blk.fromCidUnchecked(blockCid, data) + + return ok(some((b, proof))) + except LevelDbException as e: + return err(databaseError(e.msg)) + except IOError as e: + return err(ioError("Failed to read block: " & e.msg)) + except CatchableError as e: + return err(merkleTreeError("Failed to get proof: " & e.msg)) + +proc setFilename*(builder: DatasetBuilder, filename: string) = + builder.filename = some(filename) + +proc setMimetype*(builder: DatasetBuilder, mimetype: string) = + builder.mimetype = some(mimetype) + +proc newChunker*(builder: DatasetBuilder, pool: Taskpool): BResult[AsyncChunker] = + if builder.filename.isSome: + return err(invalidOperationError("Cannot use newChunker when filename is set. Use chunkFile instead.")) + let config = newChunkerConfig(builder.chunkSize.int) + ok(newAsyncChunker(pool, config)) + +proc chunkFile*(builder: DatasetBuilder, pool: Taskpool): Future[BResult[AsyncChunkStream]] {.async.} = + if builder.filename.isNone: + return err(invalidOperationError("Cannot use chunkFile without filename. Use newChunker instead.")) + let config = newChunkerConfig(builder.chunkSize.int) + let chunker = newAsyncChunker(pool, config) + return await chunker.chunkFile(builder.filename.get()) + +proc addBlock*(builder: DatasetBuilder, b: blk.Block): Future[BResult[int]] {.async.} = + let + blockSize = b.data.len.uint64 + index = builder.blockIndex + + case builder.merkleBackend + of mbEmbeddedProofs: + builder.merkleBuilder.addBlock(b.data) + builder.blockCids.add(b.cid) + of mbLevelDb, mbPacked: + let leafHash = builder.blockHashConfig.hashFunc(b.data) + ?builder.streamingBuilder.addLeaf(leafHash) + + let + key = datasetBlockKey(builder.treeId, index) + blockRef = BlockRefSimple(blockCid: $b.cid) + blockRefBytes = ?serializeBlockRefSimple(blockRef) + try: + builder.store.db.put(key, cast[string](blockRefBytes)) + except LevelDbException as e: + return err(databaseError("Failed to write block ref: " & e.msg)) + + case builder.blockBackend + of bbSharded: + discard ?await builder.store.repo.putBlock(b) + of bbPacked: + let writeResult = builder.writeHandle.writeBlock(b.data) + if writeResult.isErr: + return err(writeResult.error) + + builder.blockIndex += 1 + builder.totalSize += blockSize + + return ok(index) + +proc finalize*(builder: DatasetBuilder): Future[BResult[Dataset]] {.async.} = + let blockCount = builder.blockIndex + + if blockCount == 0: + return err(invalidBlockError()) + + var treeCid: Cid + case builder.merkleBackend + of mbEmbeddedProofs: + builder.merkleBuilder.buildTree() + treeCid = ?builder.merkleBuilder.rootCid() + of mbLevelDb, mbPacked: + try: + let rootHash = ?builder.streamingBuilder.finalize() + treeCid = ?rootToCid(rootHash, builder.blockHashConfig.hashCode, builder.blockHashConfig.treeCodec) + except CatchableError as e: + return err(merkleTreeError("Failed to finalize: " & e.msg)) + except Exception as e: + return err(merkleTreeError("Failed to finalize: " & e.msg)) + + var merkleReader: MerkleReader = nil + if builder.merkleBackend == mbPacked: + ?builder.merkleStorage.close() + let tempTreePath = getTmpPath(builder.store.treesDir, builder.treeId, ".tree") + ?builder.store.finalizeTreeFile(tempTreePath, treeCid) + let + finalTreePath = getShardedPath(builder.store.treesDir, treeCid, ".tree") + storage = ?newPackedMerkleStorage(finalTreePath, forWriting = false) + merkleReader = newMerkleReader(storage) + elif builder.merkleBackend == mbLevelDb: + merkleReader = newMerkleReader(builder.merkleStorage) + + var dataFile: File = nil + if builder.blockBackend == bbPacked: + let finalizeResult = builder.writeHandle.finalize(builder.totalSize.int64) + if finalizeResult.isErr: + return err(finalizeResult.error) + builder.writeHandle.close() + + let tempDataPath = getTmpPath(builder.store.dataDir, builder.treeId, ".data") + ?builder.store.finalizeDataFile(tempDataPath, treeCid) + let finalDataPath = getShardedPath(builder.store.dataDir, treeCid, ".data") + try: + dataFile = open(finalDataPath, fmRead) + except IOError as e: + return err(ioError("Failed to open data file for reading: " & e.msg)) + + let manifest = newManifest( + treeCid.toBytes(), + builder.chunkSize, + builder.totalSize, + builder.filename, + builder.mimetype + ) + + let + manifestCid = ?manifest.toCid() + now = epochTime().uint64 + + #TODO cnanakos: maybe use a variant Dataset object for these? + var + blockmapSeq: seq[byte] = @[] + blockmapFile: FileBlockmap = nil + tempBmPath: string = "" + + case builder.blockmapBackend + of bmLevelDb: + blockmapSeq = newBlockmap(blockCount) + of bmFile: + tempBmPath = getTmpPath(builder.store.blockmapsDir, builder.treeId, ".blkmap") + blockmapFile = ?newFileBlockmap(tempBmPath, forWriting = true) + + let meta = DatasetMetadata( + treeCid: $treeCid, + manifestCid: $manifestCid, + lastAccess: now, + size: builder.totalSize, + blockCount: blockCount, + chunkSize: builder.chunkSize, + treeId: builder.treeId, + merkleBackend: uint8(builder.merkleBackend), + blockBackend: uint8(builder.blockBackend), + blockmapBackend: uint8(builder.blockmapBackend) + ) + + let + manifestBytes = ?encodeManifest(manifest) + metaBytes = ?serializeDatasetMetadata(meta) + + try: + let batch = newBatch() + batch.put(datasetMetadataKey($treeCid), cast[string](metaBytes)) + batch.put(manifestKey($manifestCid), cast[string](manifestBytes)) + builder.store.db.write(batch) + + case builder.merkleBackend + of mbEmbeddedProofs: + const batchSize = 1024 + for chunkStart in countup(0, blockCount - 1, batchSize): + let + chunkEnd = min(chunkStart + batchSize, blockCount) + dbBatch = newBatch() + + for index in chunkStart ..< chunkEnd: + let + proof = ?builder.merkleBuilder.getProof(index) + blockRef = BlockRef( + blockCid: $builder.blockCids[index], + proof: proof + ) + blockRefBytes = ?serializeBlockRef(blockRef) + dbBatch.put(datasetBlockKey(builder.treeId, index), cast[string](blockRefBytes)) + + builder.store.db.write(dbBatch) + of mbLevelDb, mbPacked: + discard + + case builder.blockmapBackend + of bmLevelDb: + for index in 0 ..< blockCount: + blockmapSet(blockmapSeq, index, true) + of bmFile: + ?blockmapFile.setAll(blockCount.uint64) + + case builder.blockmapBackend + of bmLevelDb: + builder.store.db.put(blockmapKey($treeCid), cast[string](blockmapSeq)) + of bmFile: + ?blockmapFile.finalize(blockCount.uint64) + blockmapFile.close() + ?builder.store.finalizeBlockmapFile(tempBmPath, treeCid) + let finalBmPath = getBlockmapPath(builder.store.blockmapsDir, treeCid) + blockmapFile = ?newFileBlockmap(finalBmPath, forWriting = false) + + var dataset = Dataset( + treeCid: treeCid, + manifestCid: manifestCid, + blockCount: blockCount, + chunkSize: builder.chunkSize, + totalSize: builder.totalSize, + treeId: builder.treeId, + repo: builder.store.repo, + db: builder.store.db, + blockmapBackend: builder.blockmapBackend, + blockmapSeq: blockmapSeq, + blockmapFile: blockmapFile, + merkleBackend: builder.merkleBackend, + merkleReader: merkleReader, + treesDir: builder.store.treesDir, + blockBackend: builder.blockBackend, + dataDir: builder.store.dataDir, + dataFile: dataFile + ) + + return ok(dataset) + except LevelDbException as e: + return err(databaseError(e.msg)) + +proc next*(iter: LruIterator): Option[Cid] = + if iter.index < iter.datasets.len: + result = some(iter.datasets[iter.index]) + inc iter.index + else: + result = none(Cid) + +iterator items*(iter: LruIterator): Cid = + for c in iter.datasets: + yield c diff --git a/blockstore/directio.nim b/blockstore/directio.nim new file mode 100644 index 0000000..c36bb6b --- /dev/null +++ b/blockstore/directio.nim @@ -0,0 +1,179 @@ +import std/os +import results +import ./errors + +when defined(posix): + import std/posix + proc c_free(p: pointer) {.importc: "free", header: "".} + +when defined(linux): + const O_DIRECT* = cint(0o40000) + +when defined(macosx): + const F_NOCACHE* = cint(48) + +const + PageSize* = 4096 + MinChunkSize* = PageSize + +type + AlignedBuffer* = object + data*: ptr UncheckedArray[byte] + size*: int + capacity*: int + + DirectFile* = ref object + fd: cint + path: string + offset: int64 + +proc isPowerOfTwo*(x: uint32): bool {.inline.} = + x > 0 and (x and (x - 1)) == 0 + +proc alignUp*(size: int, alignment: int = PageSize): int {.inline.} = + (size + alignment - 1) and not (alignment - 1) + +proc newAlignedBuffer*(size: int): AlignedBuffer = + let alignedSize = alignUp(size) + when defined(posix): + var p: pointer + let rc = posix_memalign(addr p, PageSize.csize_t, alignedSize.csize_t) + if rc != 0: + raise newException(OutOfMemDefect, "Failed to allocate aligned memory") + result.data = cast[ptr UncheckedArray[byte]](p) + else: + let + raw = alloc0(alignedSize + PageSize) + aligned = (cast[int](raw) + PageSize - 1) and not (PageSize - 1) + result.data = cast[ptr UncheckedArray[byte]](aligned) + + result.size = 0 + result.capacity = alignedSize + zeroMem(result.data, alignedSize) + +proc free*(buf: var AlignedBuffer) = + if buf.data != nil: + when defined(posix): + c_free(buf.data) + else: + dealloc(buf.data) + buf.data = nil + buf.size = 0 + buf.capacity = 0 + +proc copyFrom*(buf: var AlignedBuffer, data: openArray[byte]) = + if data.len > buf.capacity: + raise newException(ValueError, "Data exceeds buffer capacity") + + if data.len > 0: + copyMem(buf.data, unsafeAddr data[0], data.len) + + if data.len < buf.capacity: + zeroMem(addr buf.data[data.len], buf.capacity - data.len) + + buf.size = data.len + +proc clear*(buf: var AlignedBuffer) = + zeroMem(buf.data, buf.capacity) + buf.size = 0 + +proc openForWrite*(path: string): BResult[DirectFile] = + when defined(linux): + let + flags = O_WRONLY or O_CREAT or O_TRUNC or O_DIRECT + fd = posix.open(path.cstring, flags, 0o644) + if fd < 0: + return err(ioError("Failed to open file for direct I/O: " & path & " (errno: " & $errno & ")")) + ok(DirectFile(fd: fd, path: path, offset: 0)) + + elif defined(macosx): + let + flags = O_WRONLY or O_CREAT or O_TRUNC + fd = posix.open(path.cstring, flags, 0o644) + if fd < 0: + return err(ioError("Failed to open file: " & path)) + if fcntl(fd, F_NOCACHE, 1) < 0: + discard posix.close(fd) + return err(ioError("Failed to set F_NOCACHE: " & path)) + ok(DirectFile(fd: fd, path: path, offset: 0)) + + elif defined(posix): + err(ioError("Direct I/O not supported on this platform")) + + else: + err(ioError("Direct I/O not supported on this platform")) + +proc writeAligned*(f: DirectFile, buf: AlignedBuffer): BResult[int] = + when defined(posix): + let + toWrite = buf.capacity + written = posix.write(f.fd, cast[pointer](buf.data), toWrite) + if written < 0: + return err(ioError("Direct write failed (errno: " & $errno & ")")) + if written != toWrite: + return err(ioError("Incomplete direct write: " & $written & "/" & $toWrite)) + f.offset += written + ok(written.int) + else: + err(ioError("Direct I/O not supported")) + +proc truncateFile*(f: DirectFile, size: int64): BResult[void] = + when defined(posix): + if ftruncate(f.fd, size.Off) < 0: + return err(ioError("Failed to truncate file (errno: " & $errno & ")")) + ok() + else: + err(ioError("Truncate not supported")) + +proc currentOffset*(f: DirectFile): int64 {.inline.} = + f.offset + +proc close*(f: DirectFile) = + if f != nil and f.fd >= 0: + when defined(posix): + discard posix.close(f.fd) + f.fd = -1 + +proc sync*(f: DirectFile): BResult[void] = + when defined(posix): + if fsync(f.fd) < 0: + return err(ioError("Failed to sync file")) + ok() + else: + ok() + +proc writeBlockDirect*(path: string, data: openArray[byte]): BResult[void] = + let parentPath = parentDir(path) + if parentPath.len > 0: + try: + createDir(parentPath) + except OSError as e: + return err(ioError("Failed to create directory: " & e.msg)) + + let fileResult = openForWrite(path) + if fileResult.isErr: + return err(fileResult.error) + + let f = fileResult.value + defer: f.close() + + let alignedSize = alignUp(data.len) + var buf = newAlignedBuffer(alignedSize) + defer: buf.free() + + buf.copyFrom(data) + + let writeResult = f.writeAligned(buf) + if writeResult.isErr: + return err(writeResult.error) + + let truncResult = f.truncateFile(data.len.int64) + if truncResult.isErr: + return err(truncResult.error) + + when defined(macosx): + let syncResult = f.sync() + if syncResult.isErr: + return err(syncResult.error) + + ok() diff --git a/blockstore/errors.nim b/blockstore/errors.nim new file mode 100644 index 0000000..e467b5b --- /dev/null +++ b/blockstore/errors.nim @@ -0,0 +1,90 @@ +import std/strformat +import results + +type + BlockstoreErrorKind* = enum + IoError = "IO error" + SerializationError = "Serialization error" + DeserializationError = "Deserialization error" + CidError = "CID error" + MultihashError = "Multihash error" + DatabaseError = "Database error" + InvalidBlock = "Invalid block data" + BlockNotFound = "Block not found" + MerkleTreeError = "Merkle tree error" + DatasetNotFound = "Dataset not found" + QuotaExceeded = "Quota exceeded" + InvalidProof = "Invalid merkle proof" + InvalidProofHashLength = "Invalid merkle proof hash length" + ManifestEncodingError = "Manifest encoding error" + ManifestDecodingError = "Manifest decoding error" + BackendMismatch = "Backend mismatch" + InvalidOperation = "Invalid operation" + + BlockstoreError* = object + kind*: BlockstoreErrorKind + msg*: string + +type + BlockstoreResult*[T] = Result[T, BlockstoreError] + BResult*[T] = BlockstoreResult[T] + +proc newBlockstoreError*(kind: BlockstoreErrorKind, msg: string = ""): BlockstoreError = + BlockstoreError(kind: kind, msg: msg) + +proc ioError*(msg: string): BlockstoreError = + newBlockstoreError(IoError, msg) + +proc serializationError*(msg: string): BlockstoreError = + newBlockstoreError(SerializationError, msg) + +proc deserializationError*(msg: string): BlockstoreError = + newBlockstoreError(DeserializationError, msg) + +proc cidError*(msg: string): BlockstoreError = + newBlockstoreError(CidError, msg) + +proc multihashError*(msg: string): BlockstoreError = + newBlockstoreError(MultihashError, msg) + +proc databaseError*(msg: string): BlockstoreError = + newBlockstoreError(DatabaseError, msg) + +proc invalidBlockError*(): BlockstoreError = + newBlockstoreError(InvalidBlock) + +proc blockNotFoundError*(cid: string): BlockstoreError = + newBlockstoreError(BlockNotFound, cid) + +proc merkleTreeError*(msg: string): BlockstoreError = + newBlockstoreError(MerkleTreeError, msg) + +proc datasetNotFoundError*(): BlockstoreError = + newBlockstoreError(DatasetNotFound) + +proc quotaExceededError*(): BlockstoreError = + newBlockstoreError(QuotaExceeded) + +proc invalidProofError*(): BlockstoreError = + newBlockstoreError(InvalidProof) + +proc invalidProofHashLengthError*(): BlockstoreError = + newBlockstoreError(InvalidProofHashLength) + +proc manifestEncodingError*(msg: string): BlockstoreError = + newBlockstoreError(ManifestEncodingError, msg) + +proc manifestDecodingError*(msg: string): BlockstoreError = + newBlockstoreError(ManifestDecodingError, msg) + +proc backendMismatchError*(msg: string): BlockstoreError = + newBlockstoreError(BackendMismatch, msg) + +proc invalidOperationError*(msg: string): BlockstoreError = + newBlockstoreError(InvalidOperation, msg) + +proc `$`*(e: BlockstoreError): string = + if e.msg.len > 0: + fmt"{e.kind}: {e.msg}" + else: + $e.kind diff --git a/blockstore/ioutils.nim b/blockstore/ioutils.nim new file mode 100644 index 0000000..4a06573 --- /dev/null +++ b/blockstore/ioutils.nim @@ -0,0 +1,287 @@ +import std/os +import results +import ./errors +import ./directio + +when defined(posix): + import std/posix + +export PageSize, MinChunkSize, isPowerOfTwo, alignUp +export AlignedBuffer, newAlignedBuffer, free, copyFrom, clear + +type + IOMode* = enum + ioDirect + ioBuffered + + SyncPolicyKind* = enum + spNone + spEveryWrite + spEveryN + + SyncPolicy* = object + case kind*: SyncPolicyKind + of spNone: discard + of spEveryWrite: discard + of spEveryN: n*: int + + WriteHandle* = ref object + case mode*: IOMode + of ioDirect: + directFile: DirectFile + alignedBuf: AlignedBuffer + of ioBuffered: + file: File + path: string + offset: int64 + chunkSize: int + syncPolicy: SyncPolicy + writeCount: int + +proc syncNone*(): SyncPolicy = + SyncPolicy(kind: spNone) + +proc syncEveryWrite*(): SyncPolicy = + SyncPolicy(kind: spEveryWrite) + +proc syncEveryN*(n: int): SyncPolicy = + SyncPolicy(kind: spEveryN, n: n) + +proc openForWrite*(path: string, mode: IOMode, chunkSize: int, + syncPolicy: SyncPolicy = syncNone()): BResult[WriteHandle] = + let parentPath = parentDir(path) + if parentPath.len > 0: + try: + createDir(parentPath) + except OSError as e: + return err(ioError("Failed to create directory: " & e.msg)) + + case mode + of ioDirect: + let dfResult = directio.openForWrite(path) + if dfResult.isErr: + return err(dfResult.error) + + let alignedSize = alignUp(chunkSize) + var buf = newAlignedBuffer(alignedSize) + + ok(WriteHandle( + mode: ioDirect, + directFile: dfResult.value, + alignedBuf: buf, + path: path, + offset: 0, + chunkSize: chunkSize, + syncPolicy: syncPolicy, + writeCount: 0 + )) + + of ioBuffered: + try: + let f = open(path, fmWrite) + ok(WriteHandle( + mode: ioBuffered, + file: f, + path: path, + offset: 0, + chunkSize: chunkSize, + syncPolicy: syncPolicy, + writeCount: 0 + )) + except IOError as e: + err(ioError("Failed to open file: " & e.msg)) + +proc shouldSync(h: WriteHandle): bool {.inline.} = + case h.syncPolicy.kind + of spNone: false + of spEveryWrite: true + of spEveryN: h.writeCount mod h.syncPolicy.n == 0 + +proc syncFile(h: WriteHandle): BResult[void] = + case h.mode + of ioDirect: + when defined(macosx): + let syncResult = h.directFile.sync() + if syncResult.isErr: + return err(syncResult.error) + ok() + of ioBuffered: + try: + h.file.flushFile() + when defined(posix): + if fsync(h.file.getFileHandle().cint) < 0: + return err(ioError("Sync failed")) + ok() + except IOError as e: + err(ioError("Sync failed: " & e.msg)) + +proc writeBlock*(h: WriteHandle, data: openArray[byte]): BResult[int] = + case h.mode + of ioDirect: + h.alignedBuf.copyFrom(data) + let writeResult = h.directFile.writeAligned(h.alignedBuf) + if writeResult.isErr: + return err(writeResult.error) + + h.offset += data.len.int64 + h.writeCount += 1 + + if h.shouldSync(): + let syncResult = h.syncFile() + if syncResult.isErr: + return err(syncResult.error) + + ok(data.len) + + of ioBuffered: + if h.syncPolicy.kind == spNone: + let written = h.file.writeBytes(data, 0, data.len) + h.offset += written.int64 + return ok(written) + + try: + let written = h.file.writeBytes(data, 0, data.len) + if written != data.len: + return err(ioError("Incomplete write: " & $written & "/" & $data.len)) + h.offset += written.int64 + h.writeCount += 1 + + if h.shouldSync(): + let syncResult = h.syncFile() + if syncResult.isErr: + return err(syncResult.error) + + ok(written) + except IOError as e: + err(ioError("Write failed: " & e.msg)) + +proc currentOffset*(h: WriteHandle): int64 {.inline.} = + h.offset + +proc finalize*(h: WriteHandle, actualSize: int64): BResult[void] = + case h.mode + of ioDirect: + let truncResult = h.directFile.truncateFile(actualSize) + if truncResult.isErr: + return err(truncResult.error) + + when defined(macosx): + let syncResult = h.directFile.sync() + if syncResult.isErr: + return err(syncResult.error) + + ok() + + of ioBuffered: + try: + h.file.flushFile() + when defined(posix): + if fsync(h.file.getFileHandle().cint) < 0: + return err(ioError("Sync failed")) + ok() + except IOError as e: + err(ioError("Finalize failed: " & e.msg)) + +proc close*(h: WriteHandle) = + case h.mode + of ioDirect: + h.directFile.close() + var buf = h.alignedBuf + buf.free() + of ioBuffered: + try: + h.file.close() + except CatchableError: + discard + +proc writeBlockToFile*(path: string, data: openArray[byte], mode: IOMode): BResult[void] = + let parentPath = parentDir(path) + if parentPath.len > 0: + try: + createDir(parentPath) + except OSError as e: + return err(ioError("Failed to create directory: " & e.msg)) + + case mode + of ioDirect: + directio.writeBlockDirect(path, data) + + of ioBuffered: + try: + var f = open(path, fmWrite) + defer: f.close() + + let written = f.writeBytes(data, 0, data.len) + if written != data.len: + return err(ioError("Incomplete write")) + + ok() + except IOError as e: + err(ioError("Write failed: " & e.msg)) + +proc writeBlockBuffered*(path: string, data: openArray[byte]): BResult[File] = + let parentPath = parentDir(path) + if parentPath.len > 0: + try: + createDir(parentPath) + except OSError as e: + return err(ioError("Failed to create directory: " & e.msg)) + + try: + var f = open(path, fmWrite) + let written = f.writeBytes(data, 0, data.len) + if written != data.len: + f.close() + return err(ioError("Incomplete write")) + ok(f) + except IOError as e: + err(ioError("Write failed: " & e.msg)) + +proc syncAndCloseFile*(f: File): BResult[void] = + try: + f.flushFile() + when defined(posix): + if fsync(f.getFileHandle().cint) < 0: + f.close() + return err(ioError("Sync failed")) + when defined(windows): + import std/winlean + if flushFileBuffers(f.getFileHandle()) == 0: + f.close() + return err(ioError("Sync failed")) + f.close() + ok() + except IOError as e: + try: f.close() + except CatchableError: discard + err(ioError("Sync failed: " & e.msg)) + +proc validateChunkSize*(chunkSize: uint32): BResult[void] = + if chunkSize < PageSize.uint32: + return err(ioError("Chunk size must be >= " & $PageSize & " bytes")) + if not isPowerOfTwo(chunkSize): + return err(ioError("Chunk size must be power of 2")) + ok() + +proc syncDir*(dirPath: string): BResult[void] = + when defined(posix): + let fd = posix.open(dirPath.cstring, O_RDONLY) + if fd < 0: + return err(ioError("Failed to open directory for sync: " & dirPath)) + if fsync(fd) < 0: + discard posix.close(fd) + return err(ioError("Failed to sync directory: " & dirPath)) + discard posix.close(fd) + ok() + else: + ok() + +proc atomicRename*(srcPath: string, dstPath: string): BResult[void] = + try: + moveFile(srcPath, dstPath) + ?syncDir(parentDir(dstPath)) + ok() + except OSError as e: + err(ioError("Failed to rename file: " & e.msg)) + except Exception as e: + err(ioError("Failed to rename file: " & e.msg)) diff --git a/blockstore/manifest.nim b/blockstore/manifest.nim new file mode 100644 index 0000000..12092af --- /dev/null +++ b/blockstore/manifest.nim @@ -0,0 +1,165 @@ +import std/[math, streams, options, strutils] +import results +import libp2p/multicodec +import ./errors +import ./cid +import ./sha256 +import ./serialization as ser + +const + LogosStorageBlockCodec* = 0xCD02'u32 + Sha256Hcodec* = 0x12'u32 + CidVersionV1* = 1'u8 + +type + Manifest* = object + treeCid*: seq[byte] + blockSize*: uint32 + datasetSize*: uint64 + codec*: uint32 + hcodec*: uint32 + version*: uint8 + filename*: Option[string] + mimetype*: Option[string] + +proc newManifest*(treeCid: seq[byte], blockSize: uint32, datasetSize: uint64): Manifest = + Manifest( + treeCid: treeCid, + blockSize: blockSize, + datasetSize: datasetSize, + codec: LogosStorageBlockCodec, + hcodec: Sha256Hcodec, + version: CidVersionV1, + filename: none(string), + mimetype: none(string) + ) + +proc newManifest*( + treeCid: seq[byte], + blockSize: uint32, + datasetSize: uint64, + filename: Option[string], + mimetype: Option[string] +): Manifest = + Manifest( + treeCid: treeCid, + blockSize: blockSize, + datasetSize: datasetSize, + codec: LogosStorageBlockCodec, + hcodec: Sha256Hcodec, + version: CidVersionV1, + filename: filename, + mimetype: mimetype + ) + +proc blocksCount*(m: Manifest): int = + int(ceilDiv(m.datasetSize, m.blockSize.uint64)) + +proc validate*(m: Manifest): BResult[void] = + if m.treeCid.len == 0: + return err(manifestDecodingError("tree_cid cannot be empty")) + + if m.blockSize == 0: + return err(manifestDecodingError("block_size must be greater than 0")) + + if m.codec != LogosStorageBlockCodec: + return err(manifestDecodingError( + "invalid codec: expected 0xCD02, got 0x" & m.codec.toHex + )) + + if m.hcodec != Sha256Hcodec: + return err(manifestDecodingError( + "invalid hcodec: expected 0x12 (sha2-256), got 0x" & m.hcodec.toHex + )) + + if m.version != CidVersionV1: + return err(manifestDecodingError( + "invalid version: expected 1, got " & $m.version + )) + + ok() + +proc encodeManifest*(m: Manifest): BResult[seq[byte]] = + try: + let s = newStringStream() + + ser.writeBytes(s, m.treeCid) + ser.writeUint32(s, m.blockSize) + ser.writeUint64(s, m.datasetSize) + ser.writeUint32(s, m.codec) + ser.writeUint32(s, m.hcodec) + ser.writeUint8(s, m.version) + + if m.filename.isSome: + ser.writeBool(s, true) + ser.writeString(s, m.filename.get) + else: + ser.writeBool(s, false) + + if m.mimetype.isSome: + ser.writeBool(s, true) + ser.writeString(s, m.mimetype.get) + else: + ser.writeBool(s, false) + + s.setPosition(0) + ok(cast[seq[byte]](s.readAll())) + except CatchableError as e: + err(manifestEncodingError(e.msg)) + +proc decodeManifest*(data: openArray[byte]): BResult[Manifest] = + try: + let dataCopy = @data + let s = newStringStream(cast[string](dataCopy)) + + var m: Manifest + m.treeCid = ?ser.readBytes(s) + m.blockSize = ser.readUint32(s) + m.datasetSize = ser.readUint64(s) + m.codec = ser.readUint32(s) + m.hcodec = ser.readUint32(s) + m.version = ser.readUint8(s) + + if ser.readBool(s): + m.filename = some(?ser.readString(s)) + else: + m.filename = none(string) + + if ser.readBool(s): + m.mimetype = some(?ser.readString(s)) + else: + m.mimetype = none(string) + + ok(m) + except CatchableError as e: + err(manifestDecodingError(e.msg)) + +proc toCid*(m: Manifest): BResult[Cid] = + let encoded = ?encodeManifest(m) + let hash = sha256Hash(encoded) + let mh = ?wrap(Sha256Code, hash) + newCidV1(LogosStorageManifest, mh) + +proc fromCidData*(c: Cid, data: openArray[byte]): BResult[Manifest] = + if c.mcodec != LogosStorageManifest: + return err(cidError( + "Expected manifest codec 0xCD01, got 0x" & int(c.mcodec).toHex + )) + + let + manifest = ?decodeManifest(data) + computedCid = ?manifest.toCid() + + if computedCid != c: + return err(cidError("Manifest CID mismatch")) + + ok(manifest) + +proc `$`*(m: Manifest): string = + result = "Manifest(" + result.add("blockSize=" & $m.blockSize) + result.add(", datasetSize=" & $m.datasetSize) + result.add(", blocks=" & $m.blocksCount) + if m.filename.isSome: + result.add(", filename=" & m.filename.get) + result.add(")") diff --git a/blockstore/merkle.nim b/blockstore/merkle.nim new file mode 100644 index 0000000..8cb0bc8 --- /dev/null +++ b/blockstore/merkle.nim @@ -0,0 +1,692 @@ +import std/[sets, options, memfiles, bitops, os] +when defined(posix): + import std/posix +import results +import libp2p/multicodec +import leveldbstatic as leveldb +import ./errors +import ./cid +import ./sha256 + +const + HashSize* = 32 + MerkleTreePrefix* = "merkle:" + MetadataKey = ":meta" + + PackedMagic = 0x534B4C4D'u32 + PackedVersion = 4'u32 + HeaderSize = 17 + EntrySize = 32 + +type + MerkleBackend* = enum + mbEmbeddedProofs + mbLevelDb + mbPacked + + MerkleHash* = array[HashSize, byte] + + MerkleStorage* = ref object of RootObj + + LevelDbMerkleStorage* = ref object of MerkleStorage + db: LevelDb + treeId: string + + PackedMerkleStorage* = ref object of MerkleStorage + path: string + file: File + memFile: MemFile + leafCount: uint64 + numLevels: int + levelFiles: seq[File] + readOnly: bool + + StreamingMerkleBuilder* = ref object + frontier: seq[Option[MerkleHash]] + pendingIndices: seq[uint64] + leafCount: uint64 + storage: MerkleStorage + + MerkleReader* = ref object + storage: MerkleStorage + + MerkleProofNode* = object + hash*: MerkleHash + level*: int + + MerkleProof* = object + index*: uint64 + path*: seq[MerkleProofNode] + leafCount*: uint64 + + MerkleTreeBuilder* = ref object + leaves: seq[array[32, byte]] + tree: seq[seq[array[32, byte]]] + built: bool + +method putHash*(s: MerkleStorage, level: int, index: uint64, hash: MerkleHash): BResult[void] {.base, raises: [].} +method getHash*(s: MerkleStorage, level: int, index: uint64): Option[MerkleHash] {.base, raises: [].} +method setMetadata*(s: MerkleStorage, leafCount: uint64, numLevels: int): BResult[void] {.base, raises: [].} +method getMetadata*(s: MerkleStorage): tuple[leafCount: uint64, numLevels: int] {.base, raises: [].} +method close*(s: MerkleStorage): BResult[void] {.base, gcsafe, raises: [].} +method flush*(s: MerkleStorage) {.base, gcsafe, raises: [].} + +proc computeNumLevels*(leafCount: uint64): int = + if leafCount == 0: return 0 + if leafCount == 1: return 1 + fastLog2(leafCount - 1) + 2 + +proc nodesAtLevel*(leafCount: uint64, level: int): uint64 = + if leafCount == 0: return 0 + if level == 0: return leafCount + if level >= 64: + return if level == 64: 1 else: 0 + let + shifted = leafCount shr level + mask = (1'u64 shl level) - 1 + if (leafCount and mask) > 0: shifted + 1 else: shifted + +proc nodesBeforeLevel*(leafCount: uint64, level: int): uint64 = + result = 0 + for l in 0 ..< level: + result += nodesAtLevel(leafCount, l) + +proc nodePosition*(leafCount: uint64, level: int, index: uint64): uint64 = + nodesBeforeLevel(leafCount, level) + index + +proc hashConcat*(left, right: MerkleHash): MerkleHash = + var combined: array[64, byte] + copyMem(addr combined[0], unsafeAddr left[0], 32) + copyMem(addr combined[32], unsafeAddr right[0], 32) + sha256Hash(combined) + +method putHash*(s: MerkleStorage, level: int, index: uint64, hash: MerkleHash): BResult[void] {.base, raises: [].} = + err(ioError("putHash not implemented")) + +method getHash*(s: MerkleStorage, level: int, index: uint64): Option[MerkleHash] {.base, raises: [].} = + none(MerkleHash) + +method setMetadata*(s: MerkleStorage, leafCount: uint64, numLevels: int): BResult[void] {.base, raises: [].} = + err(ioError("setMetadata not implemented")) + +method getMetadata*(s: MerkleStorage): tuple[leafCount: uint64, numLevels: int] {.base, raises: [].} = + (0'u64, 0) + +method close*(s: MerkleStorage): BResult[void] {.base, gcsafe, raises: [].} = + ok() + +method flush*(s: MerkleStorage) {.base, gcsafe, raises: [].} = + discard + +proc levelDbKey(treeId: string, level: int, index: uint64): string = + MerkleTreePrefix & treeId & ":L" & $level & ":I" & $index + +proc levelDbMetaKey(treeId: string): string = + MerkleTreePrefix & treeId & MetadataKey + +proc newLevelDbMerkleStorage*(db: LevelDb, treeId: string): LevelDbMerkleStorage = + LevelDbMerkleStorage(db: db, treeId: treeId) + +method putHash*(s: LevelDbMerkleStorage, level: int, index: uint64, hash: MerkleHash): BResult[void] {.raises: [].} = + let key = levelDbKey(s.treeId, level, index) + try: + s.db.put(key, cast[string](@hash)) + ok() + except CatchableError as e: + err(databaseError(e.msg)) + except Exception as e: + err(databaseError(e.msg)) + +method getHash*(s: LevelDbMerkleStorage, level: int, index: uint64): Option[MerkleHash] {.raises: [].} = + let key = levelDbKey(s.treeId, level, index) + try: + let valueOpt = s.db.get(key) + if valueOpt.isNone or valueOpt.get.len != HashSize: + return none(MerkleHash) + var hash: MerkleHash + copyMem(addr hash[0], unsafeAddr valueOpt.get[0], HashSize) + some(hash) + except CatchableError: + none(MerkleHash) + except Exception: + none(MerkleHash) + +method setMetadata*(s: LevelDbMerkleStorage, leafCount: uint64, numLevels: int): BResult[void] {.raises: [].} = + let key = levelDbMetaKey(s.treeId) + var data: array[9, byte] + copyMem(addr data[0], unsafeAddr leafCount, 8) + var nl = numLevels.uint8 + copyMem(addr data[8], unsafeAddr nl, 1) + try: + s.db.put(key, cast[string](@data)) + ok() + except CatchableError as e: + err(databaseError(e.msg)) + except Exception as e: + err(databaseError(e.msg)) + +method getMetadata*(s: LevelDbMerkleStorage): tuple[leafCount: uint64, numLevels: int] {.raises: [].} = + let key = levelDbMetaKey(s.treeId) + try: + let valueOpt = s.db.get(key) + if valueOpt.isNone or valueOpt.get.len < 9: + return (0'u64, 0) + var + leafCount: uint64 + numLevels: uint8 + copyMem(addr leafCount, unsafeAddr valueOpt.get[0], 8) + copyMem(addr numLevels, unsafeAddr valueOpt.get[8], 1) + (leafCount, numLevels.int) + except CatchableError: + (0'u64, 0) + except Exception: + (0'u64, 0) + +proc levelTempPath(basePath: string, level: int): string = + basePath & ".L" & $level & ".tmp" + +proc newPackedMerkleStorage*(path: string, forWriting: bool = false): BResult[PackedMerkleStorage] = + var storage = PackedMerkleStorage( + path: path, + levelFiles: @[] + ) + + if forWriting: + storage.readOnly = false + storage.leafCount = 0 + storage.numLevels = 0 + storage.file = syncio.open(path, fmReadWrite) + + var header: array[HeaderSize, byte] + var magic = PackedMagic + var version = PackedVersion + copyMem(addr header[0], addr magic, 4) + copyMem(addr header[4], addr version, 4) + let written = storage.file.writeBuffer(addr header[0], HeaderSize) + if written != HeaderSize: + storage.file.close() + return err(ioError("Failed to write packed merkle header")) + else: + storage.readOnly = true + storage.memFile = memfiles.open(path, mode = fmRead) + + let + data = cast[ptr UncheckedArray[byte]](storage.memFile.mem) + fileSize = storage.memFile.size + + if fileSize < HeaderSize: + storage.memFile.close() + return err(ioError("File too small for header")) + + var + magic: uint32 + version: uint32 + nl: uint8 + copyMem(addr magic, addr data[0], 4) + copyMem(addr version, addr data[4], 4) + copyMem(addr storage.leafCount, addr data[8], 8) + copyMem(addr nl, addr data[16], 1) + storage.numLevels = nl.int + + if magic != PackedMagic: + storage.memFile.close() + return err(ioError("Invalid packed merkle file magic")) + if version != PackedVersion: + storage.memFile.close() + return err(ioError("Unsupported packed merkle file version: " & $version)) + + ok(storage) + +method putHash*(s: PackedMerkleStorage, level: int, index: uint64, hash: MerkleHash): BResult[void] {.raises: [].} = + if s.readOnly: + return err(ioError("Storage is read-only")) + + try: + if level == 0: + let offset = HeaderSize + index.int64 * EntrySize + s.file.setFilePos(offset) + let written = s.file.writeBuffer(unsafeAddr hash[0], HashSize) + if written != HashSize: + return err(ioError("Failed to write hash at level 0")) + else: + while s.levelFiles.len < level: + let tempPath = levelTempPath(s.path, s.levelFiles.len + 1) + s.levelFiles.add(syncio.open(tempPath, fmReadWrite)) + let f = s.levelFiles[level - 1] + f.setFilePos(index.int64 * HashSize) + let written = f.writeBuffer(unsafeAddr hash[0], HashSize) + if written != HashSize: + return err(ioError("Failed to write hash at level " & $level)) + ok() + except CatchableError as e: + err(ioError(e.msg)) + except Exception as e: + err(ioError(e.msg)) + +method getHash*(s: PackedMerkleStorage, level: int, index: uint64): Option[MerkleHash] {.raises: [].} = + if s.leafCount == 0: + return none(MerkleHash) + + if index >= nodesAtLevel(s.leafCount, level): + return none(MerkleHash) + + let + position = nodePosition(s.leafCount, level, index) + offset = HeaderSize + position.int64 * EntrySize + + try: + if s.readOnly: + if offset + HashSize > s.memFile.size: + return none(MerkleHash) + let data = cast[ptr UncheckedArray[byte]](s.memFile.mem) + var hash: MerkleHash + copyMem(addr hash[0], addr data[offset], HashSize) + return some(hash) + else: + s.file.setFilePos(offset) + var hash: MerkleHash + let bytesRead = s.file.readBuffer(addr hash[0], HashSize) + if bytesRead != HashSize: + return none(MerkleHash) + return some(hash) + except CatchableError: + none(MerkleHash) + except Exception: + none(MerkleHash) + +method setMetadata*(s: PackedMerkleStorage, leafCount: uint64, numLevels: int): BResult[void] {.raises: [].} = + if s.readOnly: + return ok() + s.leafCount = leafCount + s.numLevels = numLevels + + try: + s.file.setFilePos(8) + var + lc = leafCount + nl = numLevels.uint8 + var written = s.file.writeBuffer(addr lc, 8) + if written != 8: + return err(ioError("Failed to write leaf count")) + written = s.file.writeBuffer(addr nl, 1) + if written != 1: + return err(ioError("Failed to write num levels")) + ok() + except CatchableError as e: + err(ioError(e.msg)) + except Exception as e: + err(ioError(e.msg)) + +method getMetadata*(s: PackedMerkleStorage): tuple[leafCount: uint64, numLevels: int] {.raises: [].} = + (s.leafCount, s.numLevels) + +method close*(s: PackedMerkleStorage): BResult[void] {.gcsafe, raises: [].} = + try: + if s.readOnly: + s.memFile.close() + else: + s.file.setFilePos(0, fspEnd) + var buffer: array[4096, byte] + for i, levelFile in s.levelFiles: + flushFile(levelFile) + levelFile.setFilePos(0) + while true: + let bytesRead = levelFile.readBuffer(addr buffer[0], buffer.len) + if bytesRead == 0: + break + let written = s.file.writeBuffer(addr buffer[0], bytesRead) + if written != bytesRead: + return err(ioError("Failed to write level " & $(i + 1) & " data")) + levelFile.close() + removeFile(levelTempPath(s.path, i + 1)) + s.levelFiles = @[] + + flushFile(s.file) + when defined(posix): + if fsync(s.file.getFileHandle().cint) != 0: + return err(ioError("fsync failed")) + s.file.close() + ok() + except CatchableError as e: + err(ioError(e.msg)) + except Exception as e: + err(ioError(e.msg)) + +method flush*(s: PackedMerkleStorage) {.gcsafe.} = + if not s.readOnly: + flushFile(s.file) + for levelFile in s.levelFiles: + flushFile(levelFile) + +proc newStreamingMerkleBuilder*(storage: MerkleStorage): StreamingMerkleBuilder = + StreamingMerkleBuilder( + frontier: @[], + pendingIndices: @[], + leafCount: 0, + storage: storage + ) + +proc addLeaf*(builder: StreamingMerkleBuilder, hash: MerkleHash): BResult[void] {.raises: [].} = + try: + var + current = hash + level = 0 + index = builder.leafCount + + while builder.frontier.len <= level: + builder.frontier.add(none(MerkleHash)) + builder.pendingIndices.add(0'u64) + + while level < builder.frontier.len and builder.frontier[level].isSome: + let + sibling = builder.frontier[level].get() + siblingIndex = builder.pendingIndices[level] + + let r1 = builder.storage.putHash(level, siblingIndex, sibling) + if r1.isErr: + return err(r1.error) + let r2 = builder.storage.putHash(level, siblingIndex + 1, current) + if r2.isErr: + return err(r2.error) + + current = hashConcat(sibling, current) + builder.frontier[level] = none(MerkleHash) + + level += 1 + index = index shr 1 + + while builder.frontier.len <= level: + builder.frontier.add(none(MerkleHash)) + builder.pendingIndices.add(0'u64) + + builder.frontier[level] = some(current) + builder.pendingIndices[level] = index + builder.leafCount += 1 + ok() + except CatchableError as e: + err(ioError(e.msg)) + except Exception as e: + err(ioError(e.msg)) + +proc finalize*(builder: StreamingMerkleBuilder): BResult[MerkleHash] {.raises: [].} = + if builder.leafCount == 0: + return err(merkleTreeError("Cannot finalize empty tree")) + + let numLevels = computeNumLevels(builder.leafCount) + + var + current: Option[MerkleHash] = none(MerkleHash) + currentIndex: uint64 = 0 + currentLevel: int = 0 + + for level in 0 ..< builder.frontier.len: + if builder.frontier[level].isSome: + let + hash = builder.frontier[level].get() + index = builder.pendingIndices[level] + + ?builder.storage.putHash(level, index, hash) + + if current.isNone: + current = some(hash) + currentIndex = index + currentLevel = level + else: + while currentLevel < level: + currentLevel += 1 + currentIndex = currentIndex shr 1 + ?builder.storage.putHash(currentLevel, currentIndex, current.get()) + + let combined = hashConcat(hash, current.get()) + current = some(combined) + currentIndex = index shr 1 + currentLevel = level + 1 + + ?builder.storage.putHash(currentLevel, currentIndex, current.get()) + + elif current.isSome and currentLevel == level: + currentLevel += 1 + currentIndex = currentIndex shr 1 + ?builder.storage.putHash(currentLevel, currentIndex, current.get()) + + if current.isNone: + return err(merkleTreeError("Failed to compute root")) + + ?builder.storage.setMetadata(builder.leafCount, numLevels) + ?builder.storage.putHash(numLevels - 1, 0, current.get()) + builder.storage.flush() + + ok(current.get()) + +proc leafCount*(builder: StreamingMerkleBuilder): uint64 = + builder.leafCount + +proc newMerkleReader*(storage: MerkleStorage): MerkleReader = + MerkleReader(storage: storage) + +proc close*(reader: MerkleReader) = + if reader.storage != nil: + discard reader.storage.close() + +proc root*(reader: MerkleReader): Option[MerkleHash] = + let (leafCount, numLevels) = reader.storage.getMetadata() + if numLevels == 0: + return none(MerkleHash) + reader.storage.getHash(numLevels - 1, 0) + +proc leafCount*(reader: MerkleReader): uint64 = + reader.storage.getMetadata().leafCount + +proc getProof*(reader: MerkleReader, index: uint64): BResult[MerkleProof] = + let (leafCount, numLevels) = reader.storage.getMetadata() + + if index >= leafCount: + return err(invalidBlockError()) + + var + path: seq[MerkleProofNode] = @[] + idx = index + + for level in 0 ..< numLevels - 1: + let + siblingIdx = idx xor 1 + maxIdx = nodesAtLevel(leafCount, level) + + if siblingIdx < maxIdx: + let siblingOpt = reader.storage.getHash(level, siblingIdx) + if siblingOpt.isSome: + path.add(MerkleProofNode(hash: siblingOpt.get(), level: level)) + + idx = idx shr 1 + + ok(MerkleProof( + index: index, + path: path, + leafCount: leafCount + )) + +proc newMerkleTreeBuilder*(): MerkleTreeBuilder = + MerkleTreeBuilder( + leaves: @[], + tree: @[], + built: false + ) + +proc addBlock*(builder: MerkleTreeBuilder, blockData: openArray[byte]) = + if builder.built: + raise newException(Defect, "Cannot add blocks after tree has been built") + builder.leaves.add(sha256Hash(blockData)) + +proc buildTree*(builder: MerkleTreeBuilder) = + if builder.built or builder.leaves.len == 0: + return + + builder.tree = @[] + builder.tree.add(builder.leaves) + + var currentLevel = builder.leaves + while currentLevel.len > 1: + var + nextLevel: seq[array[32, byte]] = @[] + i = 0 + while i < currentLevel.len: + if i + 1 < currentLevel.len: + nextLevel.add(hashConcat(currentLevel[i], currentLevel[i + 1])) + else: + nextLevel.add(currentLevel[i]) + i += 2 + builder.tree.add(nextLevel) + currentLevel = nextLevel + + builder.built = true + +proc root*(builder: MerkleTreeBuilder): Option[array[32, byte]] = + if not builder.built or builder.tree.len == 0: + return none(array[32, byte]) + some(builder.tree[^1][0]) + +proc rootCid*(builder: MerkleTreeBuilder): BResult[Cid] = + if not builder.built: + return err(merkleTreeError("Tree not built. Call buildTree() first")) + + let rootOpt = builder.root() + if rootOpt.isNone: + return err(merkleTreeError("Failed to compute merkle root")) + + let mh = ?wrap(Sha256Code, rootOpt.get()) + newCidV1(LogosStorageTree, mh) + +proc blockCount*(builder: MerkleTreeBuilder): int = + builder.leaves.len + +proc getProof*(builder: MerkleTreeBuilder, index: int): BResult[MerkleProof] = + if index < 0 or index >= builder.leaves.len: + return err(invalidBlockError()) + + if not builder.built: + return err(merkleTreeError("Tree not built. Call buildTree() first")) + + var + path: seq[MerkleProofNode] = @[] + idx = index + + for level in 0 ..< builder.tree.len - 1: + let siblingIdx = if (idx mod 2) == 0: idx + 1 else: idx - 1 + if siblingIdx < builder.tree[level].len: + path.add(MerkleProofNode(hash: builder.tree[level][siblingIdx], level: level)) + idx = idx div 2 + + ok(MerkleProof( + index: index.uint64, + path: path, + leafCount: builder.leaves.len.uint64 + )) + +proc verify*(proof: MerkleProof, root: MerkleHash, leafHash: MerkleHash): bool = + var + current = leafHash + idx = proof.index + currentLevel = 0 + + for node in proof.path: + while currentLevel < node.level: + idx = idx shr 1 + currentLevel += 1 + + if (idx and 1) == 0: + current = hashConcat(current, node.hash) + else: + current = hashConcat(node.hash, current) + idx = idx shr 1 + currentLevel += 1 + + current == root + +proc verify*(proof: MerkleProof, root: openArray[byte], data: openArray[byte]): BResult[bool] = + if root.len != 32: + return err(invalidProofError()) + + var rootHash: MerkleHash + copyMem(addr rootHash[0], unsafeAddr root[0], 32) + + var + currentHash = sha256Hash(data) + idx = proof.index + currentLevel = 0 + + for node in proof.path: + while currentLevel < node.level: + idx = idx shr 1 + currentLevel += 1 + + if (idx and 1) == 0: + currentHash = hashConcat(currentHash, node.hash) + else: + currentHash = hashConcat(node.hash, currentHash) + idx = idx shr 1 + currentLevel += 1 + + ok(currentHash == rootHash) + +proc rootToCid*(root: MerkleHash, hashCode: MultiCodec, treeCodec: MultiCodec): BResult[Cid] = + let mh = ?wrap(hashCode, root) + newCidV1(treeCodec, mh) + +proc rootToCid*(root: MerkleHash): BResult[Cid] = + rootToCid(root, Sha256Code, LogosStorageTree) + +proc collectLeavesUnderNode(nodeIdx: int, levelSize: int, totalLeaves: int, leaves: var HashSet[int]) + +proc getRequiredLeafIndices*(start: int, count: int, totalLeaves: int): HashSet[int] = + result = initHashSet[int]() + + var have = initHashSet[int]() + for i in start ..< start + count: + have.incl(i) + + var levelSize = totalLeaves + + while levelSize > 1: + var + nextHave = initHashSet[int]() + processedPairs = initHashSet[int]() + + for idx in have: + let pairIdx = idx div 2 + + if pairIdx in processedPairs: + continue + processedPairs.incl(pairIdx) + + let + leftIdx = pairIdx * 2 + rightIdx = pairIdx * 2 + 1 + + haveLeft = leftIdx in have + haveRight = rightIdx < levelSize and rightIdx in have + + if haveLeft and not haveRight and rightIdx < levelSize: + collectLeavesUnderNode(rightIdx, levelSize, totalLeaves, result) + elif not haveLeft and haveRight: + collectLeavesUnderNode(leftIdx, levelSize, totalLeaves, result) + + nextHave.incl(pairIdx) + + levelSize = (levelSize + 1) div 2 + have = nextHave + +proc collectLeavesUnderNode(nodeIdx: int, levelSize: int, totalLeaves: int, leaves: var HashSet[int]) = + var + currentSize = levelSize + levelsToLeaves = 0 + while currentSize < totalLeaves: + currentSize = currentSize * 2 + inc levelsToLeaves + + let + leavesPerNode = 1 shl levelsToLeaves + startLeaf = nodeIdx * leavesPerNode + endLeaf = min((nodeIdx + 1) * leavesPerNode, totalLeaves) + + for leafIdx in startLeaf ..< endLeaf: + leaves.incl(leafIdx) diff --git a/blockstore/multicodec_exts.nim b/blockstore/multicodec_exts.nim new file mode 100644 index 0000000..0c01be7 --- /dev/null +++ b/blockstore/multicodec_exts.nim @@ -0,0 +1,6 @@ +## LogosStorage multicodec extensions for libp2p +const CodecExts* = @[ + ("logos-storage-manifest", 0xCD01), + ("logos-storage-block", 0xCD02), + ("logos-storage-tree", 0xCD03), +] diff --git a/blockstore/repostore.nim b/blockstore/repostore.nim new file mode 100644 index 0000000..e4e05c9 --- /dev/null +++ b/blockstore/repostore.nim @@ -0,0 +1,436 @@ +import std/[os, locks, atomics, strutils, times, options, tables] +when defined(posix): + import std/posix +import chronos +import chronos/asyncsync +import leveldbstatic as leveldb + +import ./errors +import ./cid +import ./blocks as blk +import ./serialization +import ./sharding +import ./ioutils + +export PendingDeletion, BlockInfo +export IOMode + +const + BlockInfoPrefix = "block_info:" + PendingDeletionsPrefix = "pending_deletions:" + UsedKey = "repo_metadata:used" + +type + SyncWorker* = ref object + mutex: Lock + cond: Cond + running: Atomic[bool] + thread: Thread[pointer] + blocksDir: string + + CidLock* = ref object + lock*: AsyncLock + waiters*: int + + RepoStore* = ref object + blocksDir: string + db: LevelDb + quota: uint64 + used: Atomic[uint64] + ioMode: IOMode + syncBatchSize: int + syncWorker: SyncWorker + writeCount: Atomic[int] + cidLocks: Table[string, CidLock] + +when defined(linux): + proc syncfs(fd: cint): cint {.importc, header: "".} + +proc doSync(blocksDir: string) = + when defined(linux): + let fd = posix.open(blocksDir.cstring, O_RDONLY) + if fd >= 0: + discard syncfs(fd) + discard posix.close(fd) + elif defined(posix): + proc sync() {.importc, header: "".} + sync() + else: + discard + +proc syncWorkerLoop(workerPtr: pointer) {.thread, nimcall.} = + let worker = cast[SyncWorker](workerPtr) + while true: + acquire(worker.mutex) + while worker.running.load(): + wait(worker.cond, worker.mutex) + if not worker.running.load(): + break + release(worker.mutex) + doSync(worker.blocksDir) + acquire(worker.mutex) + release(worker.mutex) + doSync(worker.blocksDir) + break + +proc newSyncWorker*(blocksDir: string): SyncWorker = + result = SyncWorker(blocksDir: blocksDir) + initLock(result.mutex) + initCond(result.cond) + result.running.store(true) + createThread(result.thread, syncWorkerLoop, cast[pointer](result)) + +proc triggerSync*(worker: SyncWorker) = + signal(worker.cond) + +proc stopSyncWorker*(worker: SyncWorker) = + worker.running.store(false) + signal(worker.cond) + joinThread(worker.thread) + deinitCond(worker.cond) + deinitLock(worker.mutex) + +proc calculateUsedFromDb(db: LevelDb): uint64 = + result = 0 + for key, value in db.iter(): + if key.startsWith(BlockInfoPrefix): + let infoResult = deserializeBlockInfo(cast[seq[byte]](value)) + if infoResult.isOk: + result += infoResult.value.size.uint64 + elif key.startsWith(PendingDeletionsPrefix): + let pdResult = deserializePendingDeletion(cast[seq[byte]](value)) + if pdResult.isOk: + result += pdResult.value.size + +proc newRepoStore*(blocksDir: string, db: LevelDb, quota: uint64 = 0, + ioMode: IOMode = ioDirect, + syncBatchSize: int = 0): BResult[RepoStore] = + ?initShardDirectories(blocksDir) + + var used: uint64 = 0 + try: + let usedBytesOpt = db.get(UsedKey) + if usedBytesOpt.isSome and usedBytesOpt.get.len > 0: + let usedResult = deserializeUint64(cast[seq[byte]](usedBytesOpt.get)) + if usedResult.isOk: + used = usedResult.value + else: + used = calculateUsedFromDb(db) + else: + used = calculateUsedFromDb(db) + let usedBytes = ?serializeUint64(used) + db.put(UsedKey, cast[string](usedBytes)) + except LevelDbException as e: + return err(databaseError(e.msg)) + + var syncWorker: SyncWorker = nil + if ioMode == ioBuffered and syncBatchSize > 1: + syncWorker = newSyncWorker(blocksDir) + + var store = RepoStore( + blocksDir: blocksDir, + db: db, + quota: quota, + ioMode: ioMode, + syncBatchSize: syncBatchSize, + syncWorker: syncWorker + ) + store.used.store(used) + + ok(store) + +proc close*(store: RepoStore) = + if store.syncWorker != nil: + stopSyncWorker(store.syncWorker) + +proc acquireCidLock*(store: RepoStore, cidStr: string): Future[CidLock] {.async.} = + var cl: CidLock + + if cidStr in store.cidLocks: + cl = store.cidLocks[cidStr] + cl.waiters += 1 + else: + cl = CidLock(lock: newAsyncLock(), waiters: 1) + store.cidLocks[cidStr] = cl + + await cl.lock.acquire() + return cl + +proc releaseCidLock*(store: RepoStore, cl: CidLock, cidStr: string) = + cl.lock.release() + + cl.waiters -= 1 + if cl.waiters == 0: + store.cidLocks.del(cidStr) + +proc used*(store: RepoStore): uint64 {.inline.} = + store.used.load() + +proc decreaseUsed*(store: RepoStore, size: uint64) {.inline.} = + discard store.used.fetchSub(size) + +proc quota*(store: RepoStore): uint64 {.inline.} = + store.quota + +proc wouldExceedQuota*(store: RepoStore, size: uint64): bool {.inline.} = + if store.quota == 0: + return false + store.used() + size > store.quota + +proc blocksDir*(store: RepoStore): string {.inline.} = + store.blocksDir + +proc getBlockPath(store: RepoStore, c: Cid): string {.inline.} = + getShardedPath(store.blocksDir, c) + +proc blockInfoKey(cidStr: string): string {.inline.} = + BlockInfoPrefix & cidStr + +proc pendingDeletionKey(cidStr: string): string {.inline.} = + PendingDeletionsPrefix & cidStr + +proc hasBlock*(store: RepoStore, c: Cid): BResult[bool] {.raises: [].} = + let key = blockInfoKey($c) + try: + let valueOpt = store.db.get(key) + ok(valueOpt.isSome) + except LevelDbException as e: + err(databaseError(e.msg)) + except CatchableError as e: + err(databaseError(e.msg)) + +proc incrementRefCount(store: RepoStore, cidStr: string): BResult[void] = + let key = blockInfoKey(cidStr) + try: + let valueOpt = store.db.get(key) + if valueOpt.isSome: + let infoResult = deserializeBlockInfo(cast[seq[byte]](valueOpt.get)) + if infoResult.isOk: + var info = infoResult.value + info.refCount += 1 + let infoBytes = ?serializeBlockInfo(info) + store.db.put(key, cast[string](infoBytes)) + ok() + except LevelDbException as e: + err(databaseError(e.msg)) + except Exception as e: + err(databaseError(e.msg)) + +proc putBlock*(store: RepoStore, b: blk.Block): Future[BResult[bool]] {.async.} = + let cidStr = $b.cid + let blockPath = store.getBlockPath(b.cid) + let blockSize = b.data.len.uint64 + + let hasIt = ?store.hasBlock(b.cid) + if hasIt: + ?store.incrementRefCount(cidStr) + return ok(false) + + let cl = await store.acquireCidLock(cidStr) + defer: store.releaseCidLock(cl, cidStr) + + let hasIt2 = ?store.hasBlock(b.cid) + if hasIt2: + ?store.incrementRefCount(cidStr) + return ok(false) + + let fileExisted = fileExists(blockPath) + + var newUsed: uint64 + if fileExisted: + newUsed = store.used.load() + else: + if store.wouldExceedQuota(blockSize): + return err(quotaExceededError()) + + case store.ioMode + of ioDirect: + let writeResult = writeBlockToFile(blockPath, b.data, ioDirect) + if writeResult.isErr: + return err(writeResult.error) + + of ioBuffered: + if store.syncBatchSize == 0: + let writeResult = writeBlockToFile(blockPath, b.data, ioBuffered) + if writeResult.isErr: + return err(writeResult.error) + + elif store.syncBatchSize == 1: + let fileResult = writeBlockBuffered(blockPath, b.data) + if fileResult.isErr: + return err(fileResult.error) + let syncResult = syncAndCloseFile(fileResult.value) + if syncResult.isErr: + return err(syncResult.error) + + else: + let writeResult = writeBlockToFile(blockPath, b.data, ioBuffered) + if writeResult.isErr: + return err(writeResult.error) + let count = store.writeCount.fetchAdd(1) + 1 + if count mod store.syncBatchSize == 0: + store.syncWorker.triggerSync() + + newUsed = store.used.fetchAdd(blockSize) + blockSize + + let info = BlockInfo(size: b.data.len, refCount: 1) + let + infoBytes = ?serializeBlockInfo(info) + usedBytes = ?serializeUint64(newUsed) + try: + store.db.put(blockInfoKey(cidStr), cast[string](infoBytes)) + store.db.put(UsedKey, cast[string](usedBytes)) + ok(not fileExisted) + except LevelDbException as e: + err(databaseError(e.msg)) + except Exception as e: + err(databaseError(e.msg)) + +proc getBlock*(store: RepoStore, c: Cid): Future[BResult[Option[blk.Block]]] {.async.} = + let blockPath = store.getBlockPath(c) + + let hasIt = ?store.hasBlock(c) + if not hasIt: + return ok(none(blk.Block)) + + if not fileExists(blockPath): + return ok(none(blk.Block)) + + var data: seq[byte] + try: + data = cast[seq[byte]](readFile(blockPath)) + except IOError as e: + return err(ioError(e.msg)) + + let b = ?blk.newBlock(data) + if b.cid != c: + return err(cidError("Block CID mismatch")) + + ok(some(b)) + +proc getBlockUnchecked*(store: RepoStore, c: Cid): Future[BResult[Option[blk.Block]]] {.async.} = + let blockPath = store.getBlockPath(c) + + let hasIt = ?store.hasBlock(c) + if not hasIt: + return ok(none(blk.Block)) + + if not fileExists(blockPath): + return ok(none(blk.Block)) + + var data: seq[byte] + try: + data = cast[seq[byte]](readFile(blockPath)) + except IOError as e: + return err(ioError(e.msg)) + + ok(some(blk.fromCidUnchecked(c, data))) + +proc releaseBlock*(store: RepoStore, c: Cid): BResult[bool] = + let cidStr = $c + let blockPath = store.getBlockPath(c) + let key = blockInfoKey(cidStr) + + try: + let valueOpt = store.db.get(key) + if valueOpt.isNone: + return ok(false) + + var info = ?deserializeBlockInfo(cast[seq[byte]](valueOpt.get)) + if info.refCount == 0: + return err(databaseError("Block ref_count already 0")) + + info.refCount -= 1 + + if info.refCount == 0: + let blockSize = info.size.uint64 + let pd = PendingDeletion( + queuedAt: epochTime().uint64, + blockPath: blockPath, + size: blockSize + ) + let pdBytes = ?serializePendingDeletion(pd) + store.db.delete(key) + store.db.put(pendingDeletionKey(cidStr), cast[string](pdBytes)) + return ok(true) + else: + let infoBytes = ?serializeBlockInfo(info) + store.db.put(key, cast[string](infoBytes)) + return ok(false) + + except LevelDbException as e: + err(databaseError(e.msg)) + +proc getPendingDeletions*(store: RepoStore, limit: int): BResult[seq[(string, PendingDeletion)]] = + var entries: seq[(string, PendingDeletion)] = @[] + try: + for key, value in store.db.iter(): + if not key.startsWith(PendingDeletionsPrefix): + continue + let cidStr = key[PendingDeletionsPrefix.len .. ^1] + let pdResult = deserializePendingDeletion(cast[seq[byte]](value)) + if pdResult.isOk: + entries.add((cidStr, pdResult.value)) + if entries.len >= limit: + break + ok(entries) + except LevelDbException as e: + err(databaseError(e.msg)) + +proc pendingDeletionsCount*(store: RepoStore): BResult[int] = + var count = 0 + try: + for key, _ in store.db.iter(): + if key.startsWith(PendingDeletionsPrefix): + inc count + ok(count) + except LevelDbException as e: + err(databaseError(e.msg)) + +proc deletePendingBlock*(store: RepoStore, c: Cid, blockPath: string, size: uint64): Future[BResult[bool]] {.async.} = + let hasIt = ?store.hasBlock(c) + if hasIt: + return ok(false) + + if fileExists(blockPath): + try: + removeFile(blockPath) + discard store.used.fetchSub(size) + except OSError as e: + return err(ioError(e.msg)) + + ok(true) + +proc removePendingDeletion*(store: RepoStore, cidStr: string): BResult[void] = + try: + store.db.delete(pendingDeletionKey(cidStr)) + ok() + except LevelDbException as e: + err(databaseError(e.msg)) + +proc removePendingDeletionsBatch*(store: RepoStore, cidStrs: seq[string]): BResult[void] = + if cidStrs.len == 0: + return ok() + let currentUsed = store.used.load() + let usedBytes = ?serializeUint64(currentUsed) + try: + let batch = newBatch() + for cidStr in cidStrs: + batch.delete(pendingDeletionKey(cidStr)) + batch.put(UsedKey, cast[string](usedBytes)) + store.db.write(batch) + ok() + except LevelDbException as e: + err(databaseError(e.msg)) + +proc totalSize*(store: RepoStore): BResult[uint64] = + var total: uint64 = 0 + try: + for key, value in store.db.iter(): + if key.startsWith(BlockInfoPrefix): + let infoResult = deserializeBlockInfo(cast[seq[byte]](value)) + if infoResult.isOk: + total += infoResult.value.size.uint64 + ok(total) + except LevelDbException as e: + err(databaseError(e.msg)) diff --git a/blockstore/serialization.nim b/blockstore/serialization.nim new file mode 100644 index 0000000..a933d7f --- /dev/null +++ b/blockstore/serialization.nim @@ -0,0 +1,171 @@ +import std/[streams, endians] +import results + +import ./errors + +proc writeUint8*(s: Stream, v: uint8) = + s.write(v) + +proc readUint8*(s: Stream): uint8 = + s.read(result) + +proc writeUint16*(s: Stream, v: uint16) = + var le: uint16 + littleEndian16(addr le, unsafeAddr v) + s.write(le) + +proc readUint16*(s: Stream): uint16 = + var le: uint16 + s.read(le) + littleEndian16(addr result, addr le) + +proc writeUint32*(s: Stream, v: uint32) = + var le: uint32 + littleEndian32(addr le, unsafeAddr v) + s.write(le) + +proc readUint32*(s: Stream): uint32 = + var le: uint32 + s.read(le) + littleEndian32(addr result, addr le) + +proc writeUint64*(s: Stream, v: uint64) = + var le: uint64 + littleEndian64(addr le, unsafeAddr v) + s.write(le) + +proc readUint64*(s: Stream): uint64 = + var le: uint64 + s.read(le) + littleEndian64(addr result, addr le) + +proc writeInt64*(s: Stream, v: int64) = + writeUint64(s, cast[uint64](v)) + +proc readInt64*(s: Stream): int64 = + cast[int64](readUint64(s)) + +proc writeBytes*(s: Stream, data: openArray[byte]) = + s.writeUint64(data.len.uint64) + if data.len > 0: + s.writeData(unsafeAddr data[0], data.len) + +proc readBytes*(s: Stream): BResult[seq[byte]] = + let len = s.readUint64().int + if len > 0: + var data = newSeq[byte](len) + let bytesRead = s.readData(addr data[0], len) + if bytesRead != len: + return err(ioError("Failed to read " & $len & " bytes, got " & $bytesRead)) + ok(data) + else: + ok(newSeq[byte]()) + +proc writeString*(s: Stream, str: string) = + writeBytes(s, cast[seq[byte]](str)) + +proc readString*(s: Stream): BResult[string] = + let bytes = ?readBytes(s) + var res = newString(bytes.len) + if bytes.len > 0: + copyMem(addr res[0], unsafeAddr bytes[0], bytes.len) + ok(res) + +proc writeBool*(s: Stream, v: bool) = + s.writeUint8(if v: 1 else: 0) + +proc readBool*(s: Stream): bool = + s.readUint8() != 0 + +proc toBytes*[T](obj: T, writer: proc(s: Stream, v: T) {.gcsafe.}): BResult[seq[byte]] {.gcsafe.} = + try: + let s = newStringStream() + {.cast(gcsafe).}: + writer(s, obj) + s.setPosition(0) + ok(cast[seq[byte]](s.readAll())) + except Exception as e: + err(serializationError("serialization failed: " & e.msg)) + +proc fromBytes*[T](data: openArray[byte], reader: proc(s: Stream): T {.gcsafe.}): BResult[T] {.gcsafe.} = + try: + var str = newString(data.len) + if data.len > 0: + copyMem(addr str[0], unsafeAddr data[0], data.len) + let s = newStringStream(str) + {.cast(gcsafe).}: + ok(reader(s)) + except Exception as e: + err(deserializationError("deserialization failed: " & e.msg)) + +proc fromBytesResult*[T](data: openArray[byte], reader: proc(s: Stream): BResult[T] {.gcsafe.}): BResult[T] {.gcsafe.} = + try: + var str = newString(data.len) + if data.len > 0: + copyMem(addr str[0], unsafeAddr data[0], data.len) + let s = newStringStream(str) + {.cast(gcsafe).}: + reader(s) + except Exception as e: + err(deserializationError("deserialization failed: " & e.msg)) + +type + BlockInfo* = object + size*: int + refCount*: uint32 + +proc writeBlockInfo*(s: Stream, info: BlockInfo) {.gcsafe.} = + s.writeUint64(info.size.uint64) + s.writeUint32(info.refCount) + +proc readBlockInfo*(s: Stream): BlockInfo {.gcsafe.} = + result.size = s.readUint64().int + result.refCount = s.readUint32() + +proc serializeBlockInfo*(info: BlockInfo): BResult[seq[byte]] = + toBytes(info, writeBlockInfo) + +proc deserializeBlockInfo*(data: openArray[byte]): BResult[BlockInfo] = + fromBytes(data, readBlockInfo) + +type + PendingDeletion* = object + queuedAt*: uint64 + blockPath*: string + size*: uint64 + +proc writePendingDeletion*(s: Stream, pd: PendingDeletion) {.gcsafe.} = + s.writeUint64(pd.queuedAt) + s.writeString(pd.blockPath) + s.writeUint64(pd.size) + +proc readPendingDeletion*(s: Stream): BResult[PendingDeletion] {.gcsafe.} = + var pd: PendingDeletion + pd.queuedAt = s.readUint64() + pd.blockPath = ?s.readString() + pd.size = s.readUint64() + ok(pd) + +proc serializePendingDeletion*(pd: PendingDeletion): BResult[seq[byte]] = + toBytes(pd, writePendingDeletion) + +proc deserializePendingDeletion*(data: openArray[byte]): BResult[PendingDeletion] = + fromBytesResult(data, readPendingDeletion) + +proc serializeUint64*(v: uint64): BResult[seq[byte]] = + try: + let s = newStringStream() + s.writeUint64(v) + s.setPosition(0) + ok(cast[seq[byte]](s.readAll())) + except Exception as e: + err(serializationError("serialization failed: " & e.msg)) + +proc deserializeUint64*(data: openArray[byte]): BResult[uint64] = + if data.len < 8: + return err(deserializationError("Data too short for uint64")) + var str = newString(data.len) + if data.len > 0: + copyMem(addr str[0], unsafeAddr data[0], data.len) + let s = newStringStream(str) + ok(s.readUint64()) diff --git a/blockstore/sha256.nim b/blockstore/sha256.nim new file mode 100644 index 0000000..31bcc7e --- /dev/null +++ b/blockstore/sha256.nim @@ -0,0 +1,31 @@ +## Compile with: +## -d:useConstantine to use Constantine's SHA256 implementation +## -d:useBlake3 to use BLAKE3 (hashlib) for benchmarking +## Default uses nimcrypto SHA256 +when defined(useBlake3): + import hashlib/misc/blake3 + + proc sha256Hash*(data: openArray[byte]): array[32, byte] = + ## Compute BLAKE3 hash (32 bytes, same size as SHA256) + var ctx: Context[BLAKE3] + ctx.init() + ctx.update(data) + ctx.final(result) + +elif defined(useConstantine): + import constantine/hashes + + proc sha256Hash*(data: openArray[byte]): array[32, byte] = + ## Compute SHA2-256 hash using Constantine + result = hashes.sha256.hash(data) + +else: + import nimcrypto/sha2 + + proc sha256Hash*(data: openArray[byte]): array[32, byte] = + ## Compute SHA2-256 hash using nimcrypto + var ctx: sha256 + ctx.init() + ctx.update(data) + result = ctx.finish().data + ctx.clear() diff --git a/blockstore/sharding.nim b/blockstore/sharding.nim new file mode 100644 index 0000000..9b19d76 --- /dev/null +++ b/blockstore/sharding.nim @@ -0,0 +1,52 @@ +import std/os +import results + +import ./errors +import ./cid + +const + Base32Chars* = "abcdefghijklmnopqrstuvwxyz234567" + TmpDirName* = "tmp" + +proc initShardDirectories*(baseDir: string): BResult[void] = + let marker = baseDir / ".shards_initialized" + + if fileExists(marker): + return ok() + + try: + createDir(baseDir) + discard existsOrCreateDir(baseDir / TmpDirName) + + for c1 in Base32Chars: + let level1 = baseDir / $c1 + discard existsOrCreateDir(level1) + for c2 in Base32Chars: + let level2 = level1 / $c2 + discard existsOrCreateDir(level2) + + writeFile(marker, "") + ok() + except OSError as e: + err(ioError(e.msg)) + +proc cleanupTmpDir*(baseDir: string) = + let tmpDir = baseDir / TmpDirName + if dirExists(tmpDir): + try: + removeDir(tmpDir) + createDir(tmpDir) + except OSError: + discard + +proc getTmpPath*(baseDir: string, name: string, ext: string = ""): string = + baseDir / TmpDirName / (name & ext) + +proc getShardedPathStr*(baseDir: string, cidStr: string, ext: string = ""): string = + let len = cidStr.len + let d1 = cidStr[len - 2 .. len - 2] + let d2 = cidStr[len - 1 .. len - 1] + baseDir / d1 / d2 / (cidStr & ext) + +proc getShardedPath*(baseDir: string, c: Cid, ext: string = ""): string = + getShardedPathStr(baseDir, $c, ext) diff --git a/nim.cfg b/nim.cfg new file mode 100644 index 0000000..804a3ca --- /dev/null +++ b/nim.cfg @@ -0,0 +1,4 @@ +--path:"." +# Register blockstore specific multicodecs with libp2p +-d:libp2p_multicodec_exts="blockstore/multicodec_exts.nim" +-d:libp2p_contentids_exts="blockstore/contentids_exts.nim" diff --git a/tests/bench_dataset.nim b/tests/bench_dataset.nim new file mode 100644 index 0000000..702d49c --- /dev/null +++ b/tests/bench_dataset.nim @@ -0,0 +1,498 @@ +import std/[os, times, strformat, random, options, strutils] +import chronos +import taskpools +import results +import ../blockstore/errors +import ../blockstore/blocks +import ../blockstore/chunker +import ../blockstore/dataset +import ../blockstore/cid +import ../blockstore/merkle +import ../blockstore/ioutils +import ../blockstore/blockmap + +when defined(posix): + import std/posix +elif defined(windows): + import std/winlean + +const + DefaultSize = 4'u64 * 1024 * 1024 * 1024 + DefaultChunkSize = 64 * 1024 + DefaultPoolSize = 4 + TestDir = "nim_blockstore_bench" + TestFile = TestDir / "testfile.bin" + DbPath = TestDir / "bench_db" + BlocksDir = TestDir / "blocks" + +type + BenchConfig = object + totalSize: uint64 + chunkSize: int + merkleBackend: MerkleBackend + blockBackend: BlockBackend + blockmapBackend: BlockmapBackend + ioMode: IOMode + syncBatchSize: int + synthetic: bool + reportInterval: float + poolSize: int + blockHashConfig: BlockHashConfig + +proc formatSize(bytes: uint64): string = + if bytes >= 1024'u64 * 1024 * 1024 * 1024: + &"{bytes.float / (1024 * 1024 * 1024 * 1024):.2f} TB" + elif bytes >= 1024'u64 * 1024 * 1024: + &"{bytes.float / (1024 * 1024 * 1024):.2f} GB" + elif bytes >= 1024'u64 * 1024: + &"{bytes.float / (1024 * 1024):.2f} MB" + else: + &"{bytes} bytes" + +proc formatRate(bytesPerSec: float): string = + if bytesPerSec >= 1024 * 1024 * 1024: + &"{bytesPerSec / (1024 * 1024 * 1024):.2f} GB/s" + elif bytesPerSec >= 1024 * 1024: + &"{bytesPerSec / (1024 * 1024):.2f} MB/s" + else: + &"{bytesPerSec / 1024:.2f} KB/s" + +proc parseSize(s: string): uint64 = + var num = s + var multiplier: uint64 = 1 + + if s.endsWith("TB") or s.endsWith("tb"): + num = s[0..^3] + multiplier = 1024'u64 * 1024 * 1024 * 1024 + elif s.endsWith("GB") or s.endsWith("gb"): + num = s[0..^3] + multiplier = 1024'u64 * 1024 * 1024 + elif s.endsWith("MB") or s.endsWith("mb"): + num = s[0..^3] + multiplier = 1024'u64 * 1024 + elif s.endsWith("KB") or s.endsWith("kb"): + num = s[0..^3] + multiplier = 1024'u64 + + try: + result = uint64(parseInt(num)) * multiplier + except ValueError: + result = DefaultSize + +proc syncFile(f: File) = + flushFile(f) + when defined(posix): + discard fsync(f.getFileHandle().cint) + elif defined(windows): + discard flushFileBuffers(f.getFileHandle()) + +proc createTestFile(path: string, size: uint64) = + echo &"Creating {formatSize(size)} test file..." + let startTime = epochTime() + + randomize() + var f = open(path, fmWrite) + + const bufSize = 1024 * 1024 + var buf = newSeq[byte](bufSize) + var remaining = size + + while remaining > 0: + for i in 0 ..< bufSize: + buf[i] = byte(rand(255)) + + let writeSize = min(remaining, bufSize.uint64) + discard f.writeBytes(buf, 0, writeSize.int) + remaining -= writeSize + + syncFile(f) + f.close() + + let elapsed = epochTime() - startTime + let rate = size.float / elapsed + echo &" Created in {elapsed:.2f}s ({formatRate(rate)})" + +proc cleanup() = + if dirExists(TestDir): + removeDir(TestDir) + +proc runBenchmark(config: BenchConfig) {.async.} = + echo "=== Dataset Ingestion Benchmark ===" + echo &"Size: {formatSize(config.totalSize)}" + echo &"Chunk size: {config.chunkSize div 1024} KB" + echo &"Expected blocks: {config.totalSize div config.chunkSize.uint64}" + echo &"Merkle backend: {config.merkleBackend}" + echo &"Block backend: {config.blockBackend}" + echo &"Blockmap backend: {config.blockmapBackend}" + echo &"IO mode: {config.ioMode}" + echo &"Sync batch size: {config.syncBatchSize}" + echo &"Thread pool size: {config.poolSize}" + echo &"Data mode: {(if config.synthetic: \"synthetic\" else: \"file-based (async)\")}" + echo "" + + cleanup() + createDir(TestDir) + createDir(BlocksDir) + + if not config.synthetic: + createTestFile(TestFile, config.totalSize) + echo "" + + echo "Initializing dataset store..." + let storeResult = newDatasetStore(DbPath, BlocksDir, + blockHashConfig = config.blockHashConfig, + merkleBackend = config.merkleBackend, + blockBackend = config.blockBackend, + blockmapBackend = config.blockmapBackend, + ioMode = config.ioMode, + syncBatchSize = config.syncBatchSize) + if storeResult.isErr: + echo &"Failed to create store: {storeResult.error}" + return + + let store = storeResult.value + + let filename = if config.synthetic: some("benchmark") else: some(TestFile) + let builderResult = store.startDataset(config.chunkSize.uint32, filename) + if builderResult.isErr: + echo &"Failed to start dataset: {builderResult.error}" + return + + var builder = builderResult.value + + echo "Ingesting blocks..." + let ingestStart = epochTime() + var blockCount: uint64 = 0 + var totalBytes: uint64 = 0 + var lastReport = ingestStart + var lastBytes: uint64 = 0 + let totalBlocks = config.totalSize div config.chunkSize.uint64 + + if config.synthetic: + var chunk = newSeq[byte](config.chunkSize) + randomize() + for i in 0 ..< config.chunkSize: + chunk[i] = byte(rand(255)) + + while totalBytes < config.totalSize: + chunk[0] = byte(blockCount and 0xFF) + chunk[1] = byte((blockCount shr 8) and 0xFF) + chunk[2] = byte((blockCount shr 16) and 0xFF) + chunk[3] = byte((blockCount shr 24) and 0xFF) + + let blkResult = newBlock(chunk, config.blockHashConfig) + if blkResult.isErr: + echo &"Failed to create block: {blkResult.error}" + break + + let blk = blkResult.value + totalBytes += blk.data.len.uint64 + + let addResult = await builder.addBlock(blk) + if addResult.isErr: + echo &"Failed to add block: {addResult.error}" + break + + blockCount += 1 + + let now = epochTime() + if now - lastReport >= config.reportInterval: + let intervalBytes = totalBytes - lastBytes + let intervalRate = intervalBytes.float / (now - lastReport) + let overallRate = totalBytes.float / (now - ingestStart) + let progress = (blockCount.float / totalBlocks.float) * 100 + let eta = if overallRate > 0: (config.totalSize - totalBytes).float / overallRate else: 0.0 + echo &" Progress: {progress:.1f}% | Blocks: {blockCount}/{totalBlocks} | Rate: {formatRate(intervalRate)} (avg: {formatRate(overallRate)}) | ETA: {eta:.0f}s" + lastReport = now + lastBytes = totalBytes + else: + var pool = Taskpool.new(numThreads = config.poolSize) + defer: pool.shutdown() + + let streamResult = await builder.chunkFile(pool) + if streamResult.isErr: + echo &"Failed to open file: {streamResult.error}" + return + + var stream = streamResult.value + + while true: + let blockOpt = await stream.nextBlock() + if blockOpt.isNone: + break + + let blockResult = blockOpt.get() + if blockResult.isErr: + echo &"Block read error: {blockResult.error}" + break + + let blk = blockResult.value + totalBytes += blk.data.len.uint64 + + let addResult = await builder.addBlock(blk) + if addResult.isErr: + echo &"Failed to add block: {addResult.error}" + break + + blockCount += 1 + + let now = epochTime() + if now - lastReport >= config.reportInterval: + let intervalBytes = totalBytes - lastBytes + let intervalRate = intervalBytes.float / (now - lastReport) + let overallRate = totalBytes.float / (now - ingestStart) + let progress = (blockCount.float / totalBlocks.float) * 100 + let eta = if overallRate > 0: (config.totalSize - totalBytes).float / overallRate else: 0.0 + echo &" Progress: {progress:.1f}% | Blocks: {blockCount}/{totalBlocks} | Rate: {formatRate(intervalRate)} (avg: {formatRate(overallRate)}) | ETA: {eta:.0f}s" + lastReport = now + lastBytes = totalBytes + + stream.close() + + let ingestEnd = epochTime() + let ingestTime = ingestEnd - ingestStart + let ingestRate = totalBytes.float / ingestTime + + echo "" + echo "Ingestion complete:" + echo &" Blocks: {blockCount}" + echo &" Bytes: {formatSize(totalBytes)}" + echo &" Time: {ingestTime:.2f}s" + echo &" Rate: {formatRate(ingestRate)}" + echo "" + + echo "Finalizing dataset (building merkle tree)..." + let finalizeStart = epochTime() + + let datasetResult = await builder.finalize() + if datasetResult.isErr: + echo &"Failed to finalize: {datasetResult.error}" + return + + let dataset = datasetResult.value + let finalizeEnd = epochTime() + let finalizeTime = finalizeEnd - finalizeStart + + echo &" Finalize time: {finalizeTime:.2f}s" + echo "" + + let totalTime = ingestTime + finalizeTime + let overallRate = totalBytes.float / totalTime + + echo "=== Write Summary ===" + echo &" Dataset manifest CID: {dataset.manifestCid}" + echo &" Dataset tree CID: {dataset.treeCid}" + echo &" Total blocks: {dataset.blockCount}" + echo &" Total time: {totalTime:.2f}s" + echo &" Overall rate: {formatRate(overallRate)}" + echo &" Storage used: {formatSize(store.used())}" + echo "" + + echo "=== Read Benchmark (without verification) ===" + echo "Reading all blocks..." + + let readStart = epochTime() + var readBytes: uint64 = 0 + var readBlocks = 0 + var lastReadReport = readStart + var lastReadBytes: uint64 = 0 + + for i in 0 ..< dataset.blockCount: + let blockResult = await dataset.getBlock(i) + if blockResult.isErr: + echo &"Failed to read block {i}: {blockResult.error}" + break + + let blockOpt = blockResult.value + if blockOpt.isNone: + echo &"Block {i} not found" + break + + let (blk, _) = blockOpt.get() + readBytes += blk.data.len.uint64 + readBlocks += 1 + + let now = epochTime() + if now - lastReadReport >= config.reportInterval: + let intervalBytes = readBytes - lastReadBytes + let intervalRate = intervalBytes.float / (now - lastReadReport) + let overallReadRate = readBytes.float / (now - readStart) + let progress = (readBytes.float / totalBytes.float) * 100 + echo &" Progress: {progress:.1f}% | Blocks: {readBlocks} | Rate: {formatRate(intervalRate)} (avg: {formatRate(overallReadRate)})" + lastReadReport = now + lastReadBytes = readBytes + + let readEnd = epochTime() + let readTime = readEnd - readStart + let readRate = readBytes.float / readTime + + echo "" + echo "Read complete (no verification):" + echo &" Blocks read: {readBlocks}" + echo &" Bytes read: {formatSize(readBytes)}" + echo &" Time: {readTime:.2f}s" + echo &" Rate: {formatRate(readRate)}" + echo "" + + echo "=== Read Benchmark (with verification) ===" + echo "Reading and verifying all blocks..." + + let mhashResult = dataset.treeCid.mhash() + if mhashResult.isErr: + echo &"Failed to get multihash from treeCid: {mhashResult.error}" + return + + let mhash = mhashResult.value + var rootHash: MerkleHash + let digestBytes = mhash.data.buffer + if digestBytes.len >= HashSize + 2: + copyMem(addr rootHash[0], unsafeAddr digestBytes[2], HashSize) + else: + echo "Invalid multihash length" + return + + let verifyStart = epochTime() + var verifiedBlocks = 0 + var verifiedBytes: uint64 = 0 + var verifyFailed = 0 + var lastVerifyReport = verifyStart + var lastVerifyBytes: uint64 = 0 + + for i in 0 ..< dataset.blockCount: + let blockResult = await dataset.getBlock(i) + if blockResult.isErr: + echo &"Failed to read block {i}: {blockResult.error}" + break + + let blockOpt = blockResult.value + if blockOpt.isNone: + echo &"Block {i} not found" + break + + let (blk, proof) = blockOpt.get() + + let leafHash = config.blockHashConfig.hashFunc(blk.data) + if not verify(proof, rootHash, leafHash): + verifyFailed += 1 + if verifyFailed <= 5: + echo &" WARNING: Block {i} verification failed!" + + verifiedBlocks += 1 + verifiedBytes += blk.data.len.uint64 + + let now = epochTime() + if now - lastVerifyReport >= config.reportInterval: + let intervalBytes = verifiedBytes - lastVerifyBytes + let intervalRate = intervalBytes.float / (now - lastVerifyReport) + let overallVerifyRate = verifiedBytes.float / (now - verifyStart) + let progress = (verifiedBytes.float / totalBytes.float) * 100 + echo &" Progress: {progress:.1f}% | Verified: {verifiedBlocks} | Failed: {verifyFailed} | Rate: {formatRate(intervalRate)} (avg: {formatRate(overallVerifyRate)})" + lastVerifyReport = now + lastVerifyBytes = verifiedBytes + + let verifyEnd = epochTime() + let verifyTime = verifyEnd - verifyStart + let verifyRate = verifiedBytes.float / verifyTime + + echo "" + echo "Read with verification complete:" + echo &" Blocks verified: {verifiedBlocks}" + echo &" Verification failures: {verifyFailed}" + echo &" Bytes verified: {formatSize(verifiedBytes)}" + echo &" Time: {verifyTime:.2f}s" + echo &" Rate: {formatRate(verifyRate)}" + echo "" + + echo "Closing store..." + await store.closeAsync() + + echo "Cleaning up..." + cleanup() + echo "Done!" + +proc printUsage() = + echo "Usage: bench_dataset [options]" + echo "" + echo "Options:" + echo " --size= Dataset size (e.g., 1GB, 4GB, 100GB, 1TB)" + echo " --chunk= Chunk size in KB (default: 64)" + echo " --merkle= Merkle backend: embedded, leveldb, packed (default: packed)" + echo " --blocks= Block backend: sharded, packed (default: sharded)" + echo " --blockmap= Blockmap backend: leveldb, file (default: leveldb)" + echo " --io= I/O mode: direct, buffered (default: direct)" + echo " --sync= Sync batch: none, every, or N (default: none)" + echo " --pool= Thread pool size for async I/O (default: 4, min: 2)" + echo " --synthetic Use synthetic in-memory data (no file I/O)" + echo " --help Show this help" + +proc main() = + var config = BenchConfig( + totalSize: DefaultSize, + chunkSize: DefaultChunkSize, + merkleBackend: mbPacked, + blockBackend: bbSharded, + blockmapBackend: bmLevelDb, + ioMode: ioDirect, + syncBatchSize: 0, + synthetic: false, + reportInterval: 1.0, + poolSize: DefaultPoolSize, + blockHashConfig: defaultBlockHashConfig() + ) + + for arg in commandLineParams(): + if arg.startsWith("--size="): + config.totalSize = parseSize(arg[7..^1]) + elif arg.startsWith("--chunk="): + config.chunkSize = parseInt(arg[8..^1]) * 1024 + elif arg.startsWith("--merkle="): + let backend = arg[9..^1] + case backend + of "embedded", "embeddedproofs": config.merkleBackend = mbEmbeddedProofs + of "leveldb": config.merkleBackend = mbLevelDb + of "packed": config.merkleBackend = mbPacked + else: echo &"Unknown merkle backend: {backend}"; return + elif arg.startsWith("--blocks="): + let backend = arg[9..^1] + case backend + of "sharded": config.blockBackend = bbSharded + of "packed": config.blockBackend = bbPacked + else: echo &"Unknown block backend: {backend}"; return + elif arg.startsWith("--blockmap="): + let backend = arg[11..^1] + case backend + of "leveldb": config.blockmapBackend = bmLevelDb + of "file": config.blockmapBackend = bmFile + else: echo &"Unknown blockmap backend: {backend}"; return + elif arg.startsWith("--io="): + let mode = arg[5..^1] + case mode + of "direct": config.ioMode = ioDirect + of "buffered": config.ioMode = ioBuffered + else: echo &"Unknown IO mode: {mode}"; return + elif arg.startsWith("--sync="): + let value = arg[7..^1] + if value == "none": + config.syncBatchSize = 0 + elif value == "every": + config.syncBatchSize = 1 + else: + try: + config.syncBatchSize = parseInt(value) + except ValueError: + echo &"Invalid sync batch size: {value}"; return + elif arg.startsWith("--pool="): + try: + config.poolSize = max(2, parseInt(arg[7..^1])) + except ValueError: + echo &"Invalid pool size: {arg[7..^1]}"; return + elif arg == "--synthetic": + config.synthetic = true + elif arg == "--help": + printUsage() + return + + waitFor runBenchmark(config) + +when isMainModule: + main() diff --git a/tests/bench_merkle.nim b/tests/bench_merkle.nim new file mode 100644 index 0000000..5afc56d --- /dev/null +++ b/tests/bench_merkle.nim @@ -0,0 +1,319 @@ +import std/[os, times, strformat, random, options, strutils] +import chronos +import results +import leveldbstatic as leveldb +import ../blockstore/errors +import ../blockstore/blocks +import ../blockstore/dataset +import ../blockstore/merkle +import ../blockstore/sha256 +import ../blockstore/cid + +proc toHexStr(data: openArray[byte]): string = + result = "" + for b in data: + result.add(b.toHex(2)) + +const + DefaultChunkSize = 64 * 1024 + TestDir = "bench_merkle_streaming" + DbPath = TestDir / "db" + BlocksDir = TestDir / "blocks" + TreesDir = TestDir / "trees" + +type + BenchConfig = object + totalSize: uint64 + chunkSize: int + backend: MerkleBackend + storeBlocks: bool + reportInterval: float + +proc formatSize(bytes: uint64): string = + if bytes >= 1024'u64 * 1024 * 1024 * 1024: + &"{bytes.float / (1024 * 1024 * 1024 * 1024):.2f} TB" + elif bytes >= 1024'u64 * 1024 * 1024: + &"{bytes.float / (1024 * 1024 * 1024):.2f} GB" + elif bytes >= 1024'u64 * 1024: + &"{bytes.float / (1024 * 1024):.2f} MB" + else: + &"{bytes} bytes" + +proc formatRate(bytesPerSec: float): string = + if bytesPerSec >= 1024 * 1024 * 1024: + &"{bytesPerSec / (1024 * 1024 * 1024):.2f} GB/s" + elif bytesPerSec >= 1024 * 1024: + &"{bytesPerSec / (1024 * 1024):.2f} MB/s" + else: + &"{bytesPerSec / 1024:.2f} KB/s" + +proc cleanup() = + if dirExists(TestDir): + removeDir(TestDir) + +proc runMerkleOnlyBenchmark(config: BenchConfig) = + echo "=== Merkle Tree Only Benchmark ===" + echo &"Simulated size: {formatSize(config.totalSize)}" + echo &"Chunk size: {config.chunkSize div 1024} KB" + echo &"Expected blocks: {config.totalSize div config.chunkSize.uint64}" + echo &"Backend: {config.backend}" + echo "" + + cleanup() + createDir(TestDir) + createDir(TreesDir) + + let treeId = "bench_" & $epochTime().int + var storage: MerkleStorage + case config.backend + of mbPacked: + let treePath = TreesDir / (treeId & ".tree") + storage = newPackedMerkleStorage(treePath, forWriting = true).get() + of mbLevelDb: + let db = leveldb.open(DbPath) + storage = newLevelDbMerkleStorage(db, "bench") + of mbEmbeddedProofs: + echo "Embedded proofs backend not supported for this benchmark" + return + + var builder = newStreamingMerkleBuilder(storage) + + var chunk = newSeq[byte](config.chunkSize) + randomize() + for i in 0 ..< config.chunkSize: + chunk[i] = byte(rand(255)) + + echo "Building merkle tree..." + let startTime = epochTime() + var blockCount: uint64 = 0 + var processedBytes: uint64 = 0 + var lastReport = startTime + var lastBytes: uint64 = 0 + let totalBlocks = config.totalSize div config.chunkSize.uint64 + + while processedBytes < config.totalSize: + chunk[0] = byte(blockCount and 0xFF) + chunk[1] = byte((blockCount shr 8) and 0xFF) + chunk[2] = byte((blockCount shr 16) and 0xFF) + chunk[3] = byte((blockCount shr 24) and 0xFF) + + let leafHash = sha256Hash(chunk) + let addResult = builder.addLeaf(leafHash) + if addResult.isErr: + echo &"Error adding leaf: {addResult.error.msg}" + return + + blockCount += 1 + processedBytes += config.chunkSize.uint64 + + let now = epochTime() + if now - lastReport >= config.reportInterval: + let intervalBytes = processedBytes - lastBytes + let intervalRate = intervalBytes.float / (now - lastReport) + let overallRate = processedBytes.float / (now - startTime) + let progress = (blockCount.float / totalBlocks.float) * 100 + let eta = if overallRate > 0: (config.totalSize - processedBytes).float / overallRate else: 0.0 + echo &" Progress: {progress:.2f}% | Blocks: {blockCount}/{totalBlocks} | Rate: {formatRate(intervalRate)} (avg: {formatRate(overallRate)}) | ETA: {eta:.0f}s" + lastReport = now + lastBytes = processedBytes + + echo "" + echo "Finalizing tree..." + let finalizeStart = epochTime() + + let rootResult = builder.finalize() + if rootResult.isErr: + echo &"Finalize failed: {rootResult.error}" + return + + let root = rootResult.value + let finalizeEnd = epochTime() + + var treeFileSize: int64 = 0 + if config.backend == mbPacked: + let treePath = TreesDir / (treeId & ".tree") + treeFileSize = getFileSize(treePath) + + discard storage.close() + + let totalTime = finalizeEnd - startTime + let buildTime = finalizeStart - startTime + let finalizeTime = finalizeEnd - finalizeStart + let overallRate = processedBytes.float / totalTime + + echo "" + echo "=== Results ===" + echo &" Root hash: {toHexStr(root[0..7])}..." + echo &" Total blocks: {blockCount}" + echo &" Simulated data: {formatSize(processedBytes)}" + echo &" Build time: {buildTime:.2f}s" + echo &" Finalize time: {finalizeTime:.2f}s" + echo &" Total time: {totalTime:.2f}s" + echo &" Throughput: {formatRate(overallRate)}" + echo &" Blocks/sec: {blockCount.float / totalTime:.0f}" + if treeFileSize > 0: + echo &" Tree file size: {formatSize(treeFileSize.uint64)}" + echo &" Overhead: {treeFileSize.float / processedBytes.float * 100:.4f}%" + echo "" + + cleanup() + +proc runFullDatasetBenchmark(config: BenchConfig) {.async.} = + echo "=== Full Dataset Benchmark ===" + echo &"Simulated size: {formatSize(config.totalSize)}" + echo &"Chunk size: {config.chunkSize div 1024} KB" + echo &"Backend: {config.backend}" + echo "" + + cleanup() + createDir(TestDir) + createDir(BlocksDir) + + let storeResult = newDatasetStore(DbPath, BlocksDir, merkleBackend = config.backend) + if storeResult.isErr: + echo &"Failed to create store: {storeResult.error}" + return + + let store = storeResult.value + defer: store.close() + + let builderResult = store.startDataset(config.chunkSize.uint32, some("benchmark")) + if builderResult.isErr: + echo &"Failed to start dataset: {builderResult.error}" + return + + var builder = builderResult.value + + var chunk = newSeq[byte](config.chunkSize) + randomize() + for i in 0 ..< config.chunkSize: + chunk[i] = byte(rand(255)) + + echo "Ingesting blocks..." + let startTime = epochTime() + var blockCount: uint64 = 0 + var processedBytes: uint64 = 0 + var lastReport = startTime + var lastBytes: uint64 = 0 + + while processedBytes < config.totalSize: + chunk[0] = byte(blockCount and 0xFF) + chunk[1] = byte((blockCount shr 8) and 0xFF) + chunk[2] = byte((blockCount shr 16) and 0xFF) + chunk[3] = byte((blockCount shr 24) and 0xFF) + + let blkResult = newBlock(chunk) + if blkResult.isErr: + echo &"Failed to create block: {blkResult.error}" + return + + let addResult = await builder.addBlock(blkResult.value) + if addResult.isErr: + echo &"Failed to add block: {addResult.error}" + return + + blockCount += 1 + processedBytes += config.chunkSize.uint64 + + let now = epochTime() + if now - lastReport >= config.reportInterval: + let intervalBytes = processedBytes - lastBytes + let intervalRate = intervalBytes.float / (now - lastReport) + let overallRate = processedBytes.float / (now - startTime) + let progress = (processedBytes.float / config.totalSize.float) * 100 + echo &" Progress: {progress:.1f}% | Blocks: {blockCount} | Rate: {formatRate(intervalRate)} (avg: {formatRate(overallRate)})" + lastReport = now + lastBytes = processedBytes + + echo "" + echo "Finalizing dataset..." + let finalizeStart = epochTime() + + let datasetResult = await builder.finalize() + if datasetResult.isErr: + echo &"Failed to finalize: {datasetResult.error}" + return + + let dataset = datasetResult.value + let totalTime = epochTime() - startTime + let overallRate = processedBytes.float / totalTime + + echo "" + echo "=== Results ===" + echo &" Manifest CID: {dataset.manifestCid}" + echo &" Tree CID: {dataset.treeCid}" + echo &" Total blocks: {dataset.blockCount}" + echo &" Total time: {totalTime:.2f}s" + echo &" Throughput: {formatRate(overallRate)}" + echo &" Storage used: {formatSize(store.used())}" + echo "" + + cleanup() + +proc printUsage() = + echo "Usage: bench_merkle_streaming [options]" + echo "" + echo "Options:" + echo " --size= Dataset size (e.g., 1GB, 100GB, 1TB, 100TB)" + echo " --chunk= Chunk size in KB (default: 64)" + echo " --backend= Backend: packed, leveldb (default: packed)" + echo " --full Run full dataset benchmark (with block storage)" + echo " --help Show this help" + +proc parseSize(s: string): uint64 = + var num = s + var multiplier: uint64 = 1 + + if s.endsWith("TB") or s.endsWith("tb"): + num = s[0..^3] + multiplier = 1024'u64 * 1024 * 1024 * 1024 + elif s.endsWith("GB") or s.endsWith("gb"): + num = s[0..^3] + multiplier = 1024'u64 * 1024 * 1024 + elif s.endsWith("MB") or s.endsWith("mb"): + num = s[0..^3] + multiplier = 1024'u64 * 1024 + elif s.endsWith("KB") or s.endsWith("kb"): + num = s[0..^3] + multiplier = 1024'u64 + + try: + result = uint64(parseInt(num)) * multiplier + except ValueError: + result = 10'u64 * 1024 * 1024 * 1024 + +proc main() = + var config = BenchConfig( + totalSize: 10'u64 * 1024 * 1024 * 1024, + chunkSize: DefaultChunkSize, + backend: mbPacked, + storeBlocks: false, + reportInterval: 2.0 + ) + + var runFull = false + + for arg in commandLineParams(): + if arg.startsWith("--size="): + config.totalSize = parseSize(arg[7..^1]) + elif arg.startsWith("--chunk="): + config.chunkSize = parseInt(arg[8..^1]) * 1024 + elif arg.startsWith("--backend="): + let backend = arg[10..^1] + case backend + of "packed": config.backend = mbPacked + of "leveldb": config.backend = mbLevelDb + else: echo &"Unknown backend: {backend}"; return + elif arg == "--full": + runFull = true + elif arg == "--help": + printUsage() + return + + if runFull: + waitFor runFullDatasetBenchmark(config) + else: + runMerkleOnlyBenchmark(config) + +when isMainModule: + main() diff --git a/tests/test_block.nim b/tests/test_block.nim new file mode 100644 index 0000000..7fa8265 --- /dev/null +++ b/tests/test_block.nim @@ -0,0 +1,90 @@ +import std/unittest +import results +import libp2p/multicodec +import ../blockstore/errors +import ../blockstore/cid +import ../blockstore/blocks + +suite "Block tests": + test "block creation": + let data = cast[seq[byte]]("hello world") + let blockResult = newBlock(data) + + check blockResult.isOk + let b = blockResult.value + check b.data == data + check b.size == data.len + + test "block verification": + let data = cast[seq[byte]]("hello world") + let blockResult = newBlock(data) + check blockResult.isOk + + let b = blockResult.value + let verifyResult = b.verify() + check verifyResult.isOk + check verifyResult.value == true + + test "block verification fails for corrupted data": + let data = cast[seq[byte]]("verify me") + let blockResult = newBlock(data) + check blockResult.isOk + + var b = blockResult.value + b.data[0] = b.data[0] xor 1 + + let verifyResult = b.verify() + check verifyResult.isOk + check verifyResult.value == false + + test "same data produces same CID": + let data = cast[seq[byte]]("same_cid") + let block1Result = newBlock(data) + let block2Result = newBlock(data) + + check block1Result.isOk + check block2Result.isOk + check block1Result.value.cid == block2Result.value.cid + + test "different data produces different CID": + let data1 = cast[seq[byte]]("data1") + let data2 = cast[seq[byte]]("data2") + let block1Result = newBlock(data1) + let block2Result = newBlock(data2) + + check block1Result.isOk + check block2Result.isOk + check block1Result.value.cid != block2Result.value.cid + + test "CID has correct codec and hash": + let data = cast[seq[byte]]("test data") + let cidResult = computeCid(data) + + check cidResult.isOk + let c = cidResult.value + check c.cidver == CIDv1 + check c.mcodec == LogosStorageBlock + + test "CID string roundtrip": + let data = cast[seq[byte]]("roundtrip test") + let blockResult = newBlock(data) + check blockResult.isOk + + let c = blockResult.value.cid + let cidStr = $c + let parsedResult = cidFromString(cidStr) + + check parsedResult.isOk + check parsedResult.value == c + + test "BlockMetadata creation": + let data = cast[seq[byte]]("metadata test") + let blockResult = newBlock(data) + check blockResult.isOk + + let b = blockResult.value + let meta = newBlockMetadata(b.cid, b.size, 42) + + check meta.size == b.size + check meta.index == 42 + check meta.cid == $b.cid diff --git a/tests/test_chunker.nim b/tests/test_chunker.nim new file mode 100644 index 0000000..1d87b8e --- /dev/null +++ b/tests/test_chunker.nim @@ -0,0 +1,262 @@ +import std/[unittest, os, options] +import chronos +import taskpools +import results +import ../blockstore/errors +import ../blockstore/blocks +import ../blockstore/chunker + +const testDir = getTempDir() / "nim_blockstore_test" + +suite "Chunker tests": + setup: + createDir(testDir) + + teardown: + removeDir(testDir) + + test "chunk small file": + let testFile = testDir / "small.txt" + let data = "hello world" + writeFile(testFile, data) + + let chunker = newSyncChunker() + let iterResult = chunker.chunkFile(testFile) + check iterResult.isOk + + var iter = iterResult.value + var blocks: seq[Block] = @[] + + while true: + let blockOpt = iter.nextBlock() + if blockOpt.isNone: + break + check blockOpt.get().isOk + blocks.add(blockOpt.get().value) + + iter.close() + + check blocks.len == 1 + check blocks[0].data == cast[seq[byte]](data) + + test "chunk exact chunk size": + let testFile = testDir / "exact.txt" + let chunkSize = 1024 + var data = newSeq[byte](chunkSize) + for i in 0 ..< chunkSize: + data[i] = 42'u8 + writeFile(testFile, cast[string](data)) + + let config = newChunkerConfig(chunkSize) + let chunker = newSyncChunker(config) + let iterResult = chunker.chunkFile(testFile) + check iterResult.isOk + + var iter = iterResult.value + var blocks: seq[Block] = @[] + + while true: + let blockOpt = iter.nextBlock() + if blockOpt.isNone: + break + check blockOpt.get().isOk + blocks.add(blockOpt.get().value) + + iter.close() + + check blocks.len == 1 + check blocks[0].data.len == chunkSize + + test "chunk multiple chunks": + let testFile = testDir / "multi.txt" + let chunkSize = 1024 + let totalSize = chunkSize * 2 + 512 + var data = newSeq[byte](totalSize) + for i in 0 ..< totalSize: + data[i] = 42'u8 + writeFile(testFile, cast[string](data)) + + let config = newChunkerConfig(chunkSize) + let chunker = newSyncChunker(config) + let iterResult = chunker.chunkFile(testFile) + check iterResult.isOk + + var iter = iterResult.value + var blocks: seq[Block] = @[] + + while true: + let blockOpt = iter.nextBlock() + if blockOpt.isNone: + break + check blockOpt.get().isOk + blocks.add(blockOpt.get().value) + + iter.close() + + check blocks.len == 3 + check blocks[0].data.len == chunkSize + check blocks[1].data.len == chunkSize + check blocks[2].data.len == 512 + + test "chunk empty file": + let testFile = testDir / "empty.txt" + writeFile(testFile, "") + + let chunker = newSyncChunker() + let iterResult = chunker.chunkFile(testFile) + check iterResult.isOk + + var iter = iterResult.value + let blockOpt = iter.nextBlock() + iter.close() + + check blockOpt.isNone + + test "unique block CIDs": + let testFile = testDir / "unique.txt" + writeFile(testFile, "aaaaaaaaaabbbbbbbbbb") + + let config = newChunkerConfig(10) + let chunker = newSyncChunker(config) + let iterResult = chunker.chunkFile(testFile) + check iterResult.isOk + + var iter = iterResult.value + var blocks: seq[Block] = @[] + + while true: + let blockOpt = iter.nextBlock() + if blockOpt.isNone: + break + check blockOpt.get().isOk + blocks.add(blockOpt.get().value) + + iter.close() + + check blocks.len == 2 + check blocks[0].cid != blocks[1].cid + + test "chunkData helper": + let data = cast[seq[byte]]("hello world, this is a test of chunking") + let chunkSize = 10 + let blocksResults = chunkData(data, chunkSize) + + check blocksResults.len == 4 + + for br in blocksResults: + check br.isOk + + check blocksResults[^1].value.data.len == 9 + + test "file not found error": + let chunker = newSyncChunker() + let iterResult = chunker.chunkFile("/nonexistent/file.txt") + + check iterResult.isErr + check iterResult.error.kind == IoError + +proc readBlocksAsync(pool: Taskpool, filePath: string): Future[seq[Block]] {.async.} = + let chunker = newAsyncChunker(pool) + let streamResult = await chunker.chunkFile(filePath) + doAssert streamResult.isOk + var stream = streamResult.value + result = @[] + while true: + let blockOpt = await stream.nextBlock() + if blockOpt.isNone: + break + doAssert blockOpt.get().isOk + result.add(blockOpt.get().value) + stream.close() + +proc readBlocksAsyncWithConfig(pool: Taskpool, filePath: string, chunkSize: int): Future[seq[Block]] {.async.} = + let config = newChunkerConfig(chunkSize) + let chunker = newAsyncChunker(pool, config) + let streamResult = await chunker.chunkFile(filePath) + doAssert streamResult.isOk + var stream = streamResult.value + result = @[] + while true: + let blockOpt = await stream.nextBlock() + if blockOpt.isNone: + break + doAssert blockOpt.get().isOk + result.add(blockOpt.get().value) + stream.close() + +proc readTwoFilesAsync(pool: Taskpool, file1, file2: string): Future[(Block, Block)] {.async.} = + let chunker1 = newAsyncChunker(pool) + let chunker2 = newAsyncChunker(pool) + + let stream1Result = await chunker1.chunkFile(file1) + let stream2Result = await chunker2.chunkFile(file2) + doAssert stream1Result.isOk + doAssert stream2Result.isOk + + var stream1 = stream1Result.value + var stream2 = stream2Result.value + + let block1Opt = await stream1.nextBlock() + let block2Opt = await stream2.nextBlock() + doAssert block1Opt.isSome + doAssert block2Opt.isSome + doAssert block1Opt.get().isOk + doAssert block2Opt.get().isOk + + stream1.close() + stream2.close() + return (block1Opt.get().value, block2Opt.get().value) + +proc openNonexistentAsync(pool: Taskpool): Future[BResult[AsyncChunkStream]] {.async.} = + let chunker = newAsyncChunker(pool) + return await chunker.chunkFile("/nonexistent/async_file.txt") + +suite "Async Chunker tests": + var pool: Taskpool + + setup: + createDir(testDir) + pool = Taskpool.new(numThreads = 2) + + teardown: + pool.shutdown() + removeDir(testDir) + + test "async chunk small file": + let testFile = testDir / "async_small.txt" + let data = "hello async world" + writeFile(testFile, data) + + let blocks = waitFor readBlocksAsync(pool, testFile) + check blocks.len == 1 + check blocks[0].data == cast[seq[byte]](data) + + test "async chunk multiple chunks": + let testFile = testDir / "async_multi.txt" + let chunkSize = 1024 + let totalSize = chunkSize * 3 + 256 + var data = newSeq[byte](totalSize) + for i in 0 ..< totalSize: + data[i] = byte(i mod 256) + writeFile(testFile, cast[string](data)) + + let blocks = waitFor readBlocksAsyncWithConfig(pool, testFile, chunkSize) + check blocks.len == 4 + check blocks[0].data.len == chunkSize + check blocks[1].data.len == chunkSize + check blocks[2].data.len == chunkSize + check blocks[3].data.len == 256 + + test "async shared pool across chunkers": + let testFile1 = testDir / "shared1.txt" + let testFile2 = testDir / "shared2.txt" + writeFile(testFile1, "file one content") + writeFile(testFile2, "file two content") + + let (block1, block2) = waitFor readTwoFilesAsync(pool, testFile1, testFile2) + check block1.cid != block2.cid + + test "async file not found": + let streamResult = waitFor openNonexistentAsync(pool) + check streamResult.isErr + check streamResult.error.kind == IoError diff --git a/tests/test_dataset.nim b/tests/test_dataset.nim new file mode 100644 index 0000000..ef6a2a3 --- /dev/null +++ b/tests/test_dataset.nim @@ -0,0 +1,332 @@ +import std/[unittest, os, options] +import chronos +import results +import ../blockstore/errors +import ../blockstore/cid +import ../blockstore/blocks +import ../blockstore/dataset +import ../blockstore/blockmap + +const + TestDir = getTempDir() / "nim_blockstore_dataset_test" + DbPath = TestDir / "db" + BlocksDir = TestDir / "blocks" + +proc cleanup() = + if dirExists(TestDir): + removeDir(TestDir) + +proc createTestDataset(store: DatasetStore, blockCount: int, chunkSize: int = 4096): Future[BResult[Dataset]] {.async.} = + let builderResult = store.startDataset(chunkSize.uint32, some("test")) + if builderResult.isErr: + return err(builderResult.error) + + var builder = builderResult.value + + for i in 0 ..< blockCount: + var data = newSeq[byte](chunkSize) + for j in 0 ..< chunkSize: + data[j] = byte((i * chunkSize + j) mod 256) + + let blkResult = newBlock(data) + if blkResult.isErr: + return err(blkResult.error) + + let addResult = await builder.addBlock(blkResult.value) + if addResult.isErr: + return err(addResult.error) + + return await builder.finalize() + +proc runDeleteExistingDataset() {.async.} = + cleanup() + createDir(TestDir) + createDir(BlocksDir) + + let storeResult = newDatasetStore(DbPath, BlocksDir) + doAssert storeResult.isOk, "Failed to create store: " & $storeResult.error + let store = storeResult.value + defer: store.close() + + let datasetResult = await createTestDataset(store, 5) + doAssert datasetResult.isOk, "Failed to create dataset: " & $datasetResult.error + let dataset = datasetResult.value + + let manifestCid = dataset.manifestCid + + let getResult1 = await store.getDataset(dataset.treeCid) + doAssert getResult1.isOk + doAssert getResult1.value.isSome, "Dataset should exist before deletion" + + let deleteResult = await store.deleteDataset(manifestCid) + doAssert deleteResult.isOk, "Delete should succeed: " & $deleteResult.error + + let getResult2 = await store.getDataset(dataset.treeCid) + doAssert getResult2.isOk + doAssert getResult2.value.isNone, "Dataset should not exist after deletion" + + cleanup() + +proc runDeleteNonExistentDataset() {.async.} = + cleanup() + createDir(TestDir) + createDir(BlocksDir) + + let storeResult = newDatasetStore(DbPath, BlocksDir) + doAssert storeResult.isOk + let store = storeResult.value + defer: store.close() + + let fakeCidResult = cidFromString("bagazuayseaka5yn4pfmebc7bqkkoij6wb5x3o4jlvzq7flqhd63qalnrskwvy") + doAssert fakeCidResult.isOk + let fakeCid = fakeCidResult.value + + let deleteResult = await store.deleteDataset(fakeCid) + doAssert deleteResult.isErr, "Delete should fail for non-existent dataset" + doAssert deleteResult.error.kind == DatasetNotFound + + cleanup() + +proc runStorageReleasedAfterDeletion() {.async.} = + cleanup() + createDir(TestDir) + createDir(BlocksDir) + + let storeResult = newDatasetStore(DbPath, BlocksDir) + doAssert storeResult.isOk + let store = storeResult.value + defer: store.close() + + let usedBefore = store.used() + + let datasetResult = await createTestDataset(store, 10, 4096) + doAssert datasetResult.isOk + let dataset = datasetResult.value + + let usedAfterCreate = store.used() + doAssert usedAfterCreate > usedBefore, "Storage should increase after adding dataset" + + let deleteResult = await store.deleteDataset(dataset.manifestCid) + doAssert deleteResult.isOk + + # Wait for pending deletions to be processed by the worker - hoping that + # 500milli will do the job + for _ in 0 ..< 10: + await sleepAsync(50.milliseconds) + if store.used() < usedAfterCreate: + break + + let usedAfterDelete = store.used() + doAssert usedAfterDelete < usedAfterCreate, "Storage should decrease after deletion" + + cleanup() + +proc runMultipleDatasetsDeletion() {.async.} = + cleanup() + createDir(TestDir) + createDir(BlocksDir) + + let storeResult = newDatasetStore(DbPath, BlocksDir) + doAssert storeResult.isOk + let store = storeResult.value + defer: store.close() + + let dataset1Result = await createTestDataset(store, 3, 4096) + doAssert dataset1Result.isOk + let dataset1 = dataset1Result.value + + let dataset2Result = await createTestDataset(store, 4, 4096) + doAssert dataset2Result.isOk + let dataset2 = dataset2Result.value + + let get1Before = await store.getDataset(dataset1.treeCid) + let get2Before = await store.getDataset(dataset2.treeCid) + doAssert get1Before.isOk and get1Before.value.isSome + doAssert get2Before.isOk and get2Before.value.isSome + + let delete1Result = await store.deleteDataset(dataset1.manifestCid) + doAssert delete1Result.isOk + + let get1After = await store.getDataset(dataset1.treeCid) + let get2After = await store.getDataset(dataset2.treeCid) + doAssert get1After.isOk and get1After.value.isNone, "Dataset 1 should be deleted" + doAssert get2After.isOk and get2After.value.isSome, "Dataset 2 should still exist" + + let delete2Result = await store.deleteDataset(dataset2.manifestCid) + doAssert delete2Result.isOk + + let get2Final = await store.getDataset(dataset2.treeCid) + doAssert get2Final.isOk and get2Final.value.isNone, "Dataset 2 should be deleted" + + cleanup() + +proc runDeleteDatasetWithManyBlocks() {.async.} = + cleanup() + createDir(TestDir) + createDir(BlocksDir) + + let storeResult = newDatasetStore(DbPath, BlocksDir) + doAssert storeResult.isOk + let store = storeResult.value + defer: store.close() + + let datasetResult = await createTestDataset(store, 100, 4096) + doAssert datasetResult.isOk + let dataset = datasetResult.value + + doAssert dataset.blockCount == 100 + + let deleteResult = await store.deleteDataset(dataset.manifestCid) + doAssert deleteResult.isOk, "Delete should succeed for dataset with many blocks" + + let getResult = await store.getDataset(dataset.treeCid) + doAssert getResult.isOk and getResult.value.isNone + + cleanup() + +proc runMappedBlockmapBasic() {.async.} = + cleanup() + createDir(TestDir) + createDir(BlocksDir) + + let storeResult = newDatasetStore(DbPath, BlocksDir, blockmapBackend = bmFile) + doAssert storeResult.isOk, "Failed to create store with mapped blockmap: " & $storeResult.error + let store = storeResult.value + defer: store.close() + + let datasetResult = await createTestDataset(store, 10) + doAssert datasetResult.isOk, "Failed to create dataset with mapped blockmap: " & $datasetResult.error + let dataset = datasetResult.value + + doAssert dataset.blockCount == 10 + doAssert dataset.completed() == 10 + + for i in 0 ..< 10: + let blockResult = await dataset.getBlock(i) + doAssert blockResult.isOk + doAssert blockResult.value.isSome + + cleanup() + +proc runMappedBlockmapRanges() {.async.} = + cleanup() + createDir(TestDir) + createDir(BlocksDir) + + let storeResult = newDatasetStore(DbPath, BlocksDir, blockmapBackend = bmFile) + doAssert storeResult.isOk + let store = storeResult.value + defer: store.close() + + let datasetResult = await createTestDataset(store, 20) + doAssert datasetResult.isOk + let dataset = datasetResult.value + + let ranges = dataset.getBlockmapRanges() + doAssert ranges.len >= 1, "Expected at least one range" + + var totalBlocks: uint64 = 0 + for r in ranges: + totalBlocks += r.count + doAssert totalBlocks == 20, "Expected 20 blocks in ranges" + + cleanup() + +proc runMappedBlockmapPersistence() {.async.} = + cleanup() + createDir(TestDir) + createDir(BlocksDir) + + var treeCid: Cid + block: + let storeResult = newDatasetStore(DbPath, BlocksDir, blockmapBackend = bmFile) + doAssert storeResult.isOk + let store = storeResult.value + + let datasetResult = await createTestDataset(store, 15) + doAssert datasetResult.isOk + treeCid = datasetResult.value.treeCid + + store.close() + + block: + let storeResult = newDatasetStore(DbPath, BlocksDir, blockmapBackend = bmFile) + doAssert storeResult.isOk + let store = storeResult.value + defer: store.close() + + let getResult = await store.getDataset(treeCid) + doAssert getResult.isOk + doAssert getResult.value.isSome, "Dataset should persist after reopen" + + let dataset = getResult.value.get() + doAssert dataset.blockCount == 15 + doAssert dataset.completed() == 15 + + cleanup() + +proc runMappedBlockmapDeletion() {.async.} = + cleanup() + createDir(TestDir) + createDir(BlocksDir) + + let storeResult = newDatasetStore(DbPath, BlocksDir, blockmapBackend = bmFile) + doAssert storeResult.isOk + let store = storeResult.value + defer: store.close() + + let datasetResult = await createTestDataset(store, 5) + doAssert datasetResult.isOk + let dataset = datasetResult.value + + let manifestCid = dataset.manifestCid + + let deleteResult = await store.deleteDataset(manifestCid) + doAssert deleteResult.isOk + + let getResult = await store.getDataset(dataset.treeCid) + doAssert getResult.isOk + doAssert getResult.value.isNone, "Dataset should not exist after deletion" + + cleanup() + +suite "Dataset deletion tests": + setup: + cleanup() + + teardown: + cleanup() + + test "delete existing dataset": + waitFor runDeleteExistingDataset() + + test "delete non-existent dataset returns error": + waitFor runDeleteNonExistentDataset() + + test "storage released after deletion": + waitFor runStorageReleasedAfterDeletion() + + test "delete one dataset doesn't affect others": + waitFor runMultipleDatasetsDeletion() + + test "delete dataset with many blocks": + waitFor runDeleteDatasetWithManyBlocks() + +suite "Mapped blockmap backend tests": + setup: + cleanup() + + teardown: + cleanup() + + test "basic dataset operations with mapped blockmap": + waitFor runMappedBlockmapBasic() + + test "blockmap ranges work with mapped backend": + waitFor runMappedBlockmapRanges() + + test "mapped blockmap persists across reopens": + waitFor runMappedBlockmapPersistence() + + test "mapped blockmap files deleted with dataset": + waitFor runMappedBlockmapDeletion() diff --git a/tests/test_merkle.nim b/tests/test_merkle.nim new file mode 100644 index 0000000..9a3868d --- /dev/null +++ b/tests/test_merkle.nim @@ -0,0 +1,745 @@ +import std/[unittest, os, options, sets, syncio, strutils] +import results +import leveldbstatic as leveldb +import libp2p/multicodec +import ../blockstore/errors +import ../blockstore/cid +import ../blockstore/merkle +import ../blockstore/sha256 + +const + TestDbPath = "/tmp/test_merkle_db" + TestPackedPath = "/tmp/test_merkle_packed.tree" + +proc cleanup() = + if dirExists(TestDbPath): + removeDir(TestDbPath) + if fileExists(TestPackedPath): + removeFile(TestPackedPath) + +suite "MerkleTreeBuilder tests": + test "tree builder basic": + var builder = newMerkleTreeBuilder() + + builder.addBlock(cast[seq[byte]]("block1")) + builder.addBlock(cast[seq[byte]]("block2")) + builder.addBlock(cast[seq[byte]]("block3")) + + check builder.blockCount == 3 + + builder.buildTree() + let rootCidResult = builder.rootCid() + check rootCidResult.isOk + + let rootCid = rootCidResult.value + check rootCid.cidver == CIDv1 + check rootCid.mcodec == LogosStorageTree + + test "single block proof": + var builder = newMerkleTreeBuilder() + let blockData = cast[seq[byte]]("hello world") + builder.addBlock(blockData) + builder.buildTree() + + let rootCidResult = builder.rootCid() + check rootCidResult.isOk + let rootCid = rootCidResult.value + + let proofResult = builder.getProof(0) + check proofResult.isOk + let proof = proofResult.value + + let mhResult = rootCid.mhash() + check mhResult.isOk + let mh = mhResult.get() + let rootBytes = mh.data.buffer[mh.dpos .. mh.dpos + mh.size - 1] + let verifyResult = proof.verify(rootBytes, blockData) + check verifyResult.isOk + check verifyResult.value == true + + test "proof fails for wrong data": + var builder = newMerkleTreeBuilder() + let blockData = cast[seq[byte]]("hello world") + builder.addBlock(blockData) + builder.buildTree() + + let rootCidResult = builder.rootCid() + check rootCidResult.isOk + let rootCid = rootCidResult.value + + let proofResult = builder.getProof(0) + check proofResult.isOk + let proof = proofResult.value + + let wrongData = cast[seq[byte]]("wrong data") + let mhResult = rootCid.mhash() + check mhResult.isOk + let mh = mhResult.get() + let rootBytes = mh.data.buffer[mh.dpos .. mh.dpos + mh.size - 1] + let verifyResult = proof.verify(rootBytes, wrongData) + check verifyResult.isOk + check verifyResult.value == false + + test "deterministic root": + var builder1 = newMerkleTreeBuilder() + builder1.addBlock(cast[seq[byte]]("a")) + builder1.addBlock(cast[seq[byte]]("b")) + builder1.buildTree() + + var builder2 = newMerkleTreeBuilder() + builder2.addBlock(cast[seq[byte]]("a")) + builder2.addBlock(cast[seq[byte]]("b")) + builder2.buildTree() + + check builder1.rootCid().value == builder2.rootCid().value + + test "proof structure for 4-leaf tree": + var builder = newMerkleTreeBuilder() + builder.addBlock(cast[seq[byte]]("a")) + builder.addBlock(cast[seq[byte]]("b")) + builder.addBlock(cast[seq[byte]]("c")) + builder.addBlock(cast[seq[byte]]("d")) + builder.buildTree() + + let proof = builder.getProof(1).value + + check proof.index == 1 + check proof.leafCount == 4 + check proof.path.len == 2 + +suite "Streaming Merkle Storage tests": + setup: + cleanup() + + teardown: + cleanup() + + test "computeNumLevels": + check computeNumLevels(0) == 0 + check computeNumLevels(1) == 1 + check computeNumLevels(2) == 2 + check computeNumLevels(3) == 3 + check computeNumLevels(4) == 3 + check computeNumLevels(5) == 4 + check computeNumLevels(8) == 4 + check computeNumLevels(16) == 5 + + test "nodesAtLevel": + check nodesAtLevel(4, 0) == 4 + check nodesAtLevel(4, 1) == 2 + check nodesAtLevel(4, 2) == 1 + + check nodesAtLevel(5, 0) == 5 + check nodesAtLevel(5, 1) == 3 + check nodesAtLevel(5, 2) == 2 + check nodesAtLevel(5, 3) == 1 + + test "computeNumLevels edge cases": + check computeNumLevels(1'u64 shl 20) == 21 # 1M leaves -> 21 levels + check computeNumLevels(1'u64 shl 30) == 31 # 1B leaves -> 31 levels + check computeNumLevels(1'u64 shl 40) == 41 # 1T leaves -> 41 levels + check computeNumLevels(1'u64 shl 50) == 51 + check computeNumLevels(1'u64 shl 60) == 61 + check computeNumLevels(1'u64 shl 63) == 64 # 2^63 leaves -> 64 levels + + check computeNumLevels((1'u64 shl 20) + 1) == 22 + check computeNumLevels((1'u64 shl 30) + 1) == 32 + check computeNumLevels((1'u64 shl 63) + 1) == 65 + + check computeNumLevels(high(uint64)) == 65 # 2^64 - 1 -> 65 levels + + test "nodesAtLevel edge cases": + let bigLeafCount = 1'u64 shl 40 # 1 trillion leaves + check nodesAtLevel(bigLeafCount, 0) == bigLeafCount + check nodesAtLevel(bigLeafCount, 10) == 1'u64 shl 30 + check nodesAtLevel(bigLeafCount, 20) == 1'u64 shl 20 + check nodesAtLevel(bigLeafCount, 40) == 1 # root level + + let oddLeafCount = (1'u64 shl 40) + 7 + check nodesAtLevel(oddLeafCount, 0) == oddLeafCount + check nodesAtLevel(oddLeafCount, 1) == (oddLeafCount + 1) shr 1 + check nodesAtLevel(oddLeafCount, 40) == 2 + check nodesAtLevel(oddLeafCount, 41) == 1 + + let maxLeaves = high(uint64) + check nodesAtLevel(maxLeaves, 0) == maxLeaves + check nodesAtLevel(maxLeaves, 63) == 2 + check nodesAtLevel(maxLeaves, 64) == 1 # root + + check nodesAtLevel(3, 1) == 2 + check nodesAtLevel(7, 2) == 2 + check nodesAtLevel(9, 3) == 2 + check nodesAtLevel(17, 4) == 2 + + test "hashConcat deterministic": + var h1, h2: MerkleHash + for i in 0 ..< 32: + h1[i] = byte(i) + h2[i] = byte(i + 32) + + let result1 = hashConcat(h1, h2) + let result2 = hashConcat(h1, h2) + check result1 == result2 + + let result3 = hashConcat(h2, h1) + check result1 != result3 + + test "LevelDB streaming builder - single leaf": + let db = leveldb.open(TestDbPath) + defer: db.close() + + let storage = newLevelDbMerkleStorage(db, "tree1") + var builder = newStreamingMerkleBuilder(storage) + + let leafHash = sha256Hash(cast[seq[byte]]("block0")) + discard builder.addLeaf(leafHash) + + let rootResult = builder.finalize() + check rootResult.isOk + let root = rootResult.value + + check root == leafHash + check builder.leafCount == 1 + + test "LevelDB streaming builder - two leaves": + let db = leveldb.open(TestDbPath) + defer: db.close() + + let storage = newLevelDbMerkleStorage(db, "tree2") + var builder = newStreamingMerkleBuilder(storage) + + let h0 = sha256Hash(cast[seq[byte]]("block0")) + let h1 = sha256Hash(cast[seq[byte]]("block1")) + discard builder.addLeaf(h0) + discard builder.addLeaf(h1) + + let rootResult = builder.finalize() + check rootResult.isOk + let root = rootResult.value + + let expected = hashConcat(h0, h1) + check root == expected + + test "LevelDB streaming builder - four leaves": + let db = leveldb.open(TestDbPath) + defer: db.close() + + let storage = newLevelDbMerkleStorage(db, "tree4") + var builder = newStreamingMerkleBuilder(storage) + + var hashes: seq[MerkleHash] + for i in 0 ..< 4: + let h = sha256Hash(cast[seq[byte]]("block" & $i)) + hashes.add(h) + discard builder.addLeaf(h) + + let rootResult = builder.finalize() + check rootResult.isOk + let root = rootResult.value + + let left = hashConcat(hashes[0], hashes[1]) + let right = hashConcat(hashes[2], hashes[3]) + let expected = hashConcat(left, right) + check root == expected + + test "LevelDB reader and proof generation": + let db = leveldb.open(TestDbPath) + defer: db.close() + + let storage = newLevelDbMerkleStorage(db, "treeProof") + var builder = newStreamingMerkleBuilder(storage) + + var hashes: seq[MerkleHash] + for i in 0 ..< 4: + let h = sha256Hash(cast[seq[byte]]("block" & $i)) + hashes.add(h) + discard builder.addLeaf(h) + + let rootResult = builder.finalize() + check rootResult.isOk + let root = rootResult.value + + let reader = newMerkleReader(storage) + check reader.leafCount == 4 + + let rootOpt = reader.root() + check rootOpt.isSome + check rootOpt.get() == root + + let proofResult = reader.getProof(1) + check proofResult.isOk + let proof = proofResult.value + + check proof.index == 1 + check proof.leafCount == 4 + check proof.path.len == 2 + + check verify(proof, root, hashes[1]) + check not verify(proof, root, hashes[0]) + + test "LevelDB proof for all leaves": + let db = leveldb.open(TestDbPath) + defer: db.close() + + let storage = newLevelDbMerkleStorage(db, "treeAllProofs") + var builder = newStreamingMerkleBuilder(storage) + + var hashes: seq[MerkleHash] + for i in 0 ..< 8: + let h = sha256Hash(cast[seq[byte]]("block" & $i)) + hashes.add(h) + discard builder.addLeaf(h) + + let rootResult = builder.finalize() + check rootResult.isOk + let root = rootResult.value + + let reader = newMerkleReader(storage) + + for i in 0 ..< 8: + let proofResult = reader.getProof(uint64(i)) + check proofResult.isOk + let proof = proofResult.value + check verify(proof, root, hashes[i]) + + test "Packed storage - basic write and read": + let storage = newPackedMerkleStorage(TestPackedPath, forWriting = true).get() + defer: discard storage.close() + + var builder = newStreamingMerkleBuilder(storage) + + var hashes: seq[MerkleHash] + for i in 0 ..< 4: + let h = sha256Hash(cast[seq[byte]]("block" & $i)) + hashes.add(h) + discard builder.addLeaf(h) + + let rootResult = builder.finalize() + check rootResult.isOk + let root = rootResult.value + + let (leafCount, numLevels) = storage.getMetadata() + check leafCount == 4 + check numLevels == 3 + + test "Packed storage - read after close": + block: + let storage = newPackedMerkleStorage(TestPackedPath, forWriting = true).get() + var builder = newStreamingMerkleBuilder(storage) + + for i in 0 ..< 4: + let h = sha256Hash(cast[seq[byte]]("block" & $i)) + discard builder.addLeaf(h) + + discard builder.finalize() + discard storage.close() + + block: + let storage = newPackedMerkleStorage(TestPackedPath).get() + defer: discard storage.close() + + let (leafCount, numLevels) = storage.getMetadata() + check leafCount == 4 + check numLevels == 3 + + let reader = newMerkleReader(storage) + check reader.leafCount == 4 + + let rootOpt = reader.root() + check rootOpt.isSome + + test "Packed storage - proof verification": + var hashes: seq[MerkleHash] + var root: MerkleHash + + block: + let storage = newPackedMerkleStorage(TestPackedPath, forWriting = true).get() + var builder = newStreamingMerkleBuilder(storage) + + for i in 0 ..< 8: + let h = sha256Hash(cast[seq[byte]]("block" & $i)) + hashes.add(h) + discard builder.addLeaf(h) + + let rootResult = builder.finalize() + check rootResult.isOk + root = rootResult.value + discard storage.close() + + block: + let storage = newPackedMerkleStorage(TestPackedPath).get() + defer: discard storage.close() + + let reader = newMerkleReader(storage) + + for i in 0 ..< 8: + let proofResult = reader.getProof(uint64(i)) + check proofResult.isOk + let proof = proofResult.value + check verify(proof, root, hashes[i]) + + test "Non-power-of-two leaves - 5 leaves": + let db = leveldb.open(TestDbPath) + defer: db.close() + + let storage = newLevelDbMerkleStorage(db, "tree5") + var builder = newStreamingMerkleBuilder(storage) + + var hashes: seq[MerkleHash] + for i in 0 ..< 5: + let h = sha256Hash(cast[seq[byte]]("block" & $i)) + hashes.add(h) + discard builder.addLeaf(h) + + let rootResult = builder.finalize() + check rootResult.isOk + let root = rootResult.value + + let reader = newMerkleReader(storage) + check reader.leafCount == 5 + + for i in 0 ..< 5: + let proofResult = reader.getProof(uint64(i)) + check proofResult.isOk + let proof = proofResult.value + check verify(proof, root, hashes[i]) + + test "Non-power-of-two leaves - 7 leaves": + let db = leveldb.open(TestDbPath) + defer: db.close() + + let storage = newLevelDbMerkleStorage(db, "tree7") + var builder = newStreamingMerkleBuilder(storage) + + var hashes: seq[MerkleHash] + for i in 0 ..< 7: + let h = sha256Hash(cast[seq[byte]]("block" & $i)) + hashes.add(h) + discard builder.addLeaf(h) + + let rootResult = builder.finalize() + check rootResult.isOk + let root = rootResult.value + + let reader = newMerkleReader(storage) + + for i in 0 ..< 7: + let proofResult = reader.getProof(uint64(i)) + check proofResult.isOk + let proof = proofResult.value + check verify(proof, root, hashes[i]) + + test "Large tree - 1000 leaves": + let db = leveldb.open(TestDbPath) + defer: db.close() + + const numLeaves = 1000 + let storage = newLevelDbMerkleStorage(db, "tree1000") + var builder = newStreamingMerkleBuilder(storage) + + var hashes: seq[MerkleHash] + for i in 0 ..< numLeaves: + let h = sha256Hash(cast[seq[byte]]("block" & $i)) + hashes.add(h) + discard builder.addLeaf(h) + + let rootResult = builder.finalize() + check rootResult.isOk + let root = rootResult.value + + let reader = newMerkleReader(storage) + check reader.leafCount == numLeaves + + let testIndices = @[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, + 100, 250, 500, 750, 333, 666, 512, 511, 513] + for i in testIndices: + let proofResult = reader.getProof(uint64(i)) + check proofResult.isOk + let proof = proofResult.value + check verify(proof, root, hashes[i]) + + test "Large tree - 997 leaves": + let db = leveldb.open(TestDbPath) + defer: db.close() + + const numLeaves = 997 + let storage = newLevelDbMerkleStorage(db, "tree997") + var builder = newStreamingMerkleBuilder(storage) + + var hashes: seq[MerkleHash] + for i in 0 ..< numLeaves: + let h = sha256Hash(cast[seq[byte]]("block" & $i)) + hashes.add(h) + discard builder.addLeaf(h) + + let rootResult = builder.finalize() + check rootResult.isOk + let root = rootResult.value + + let reader = newMerkleReader(storage) + check reader.leafCount == numLeaves + + for i in 0 ..< numLeaves: + let proofResult = reader.getProof(uint64(i)) + check proofResult.isOk + let proof = proofResult.value + if not verify(proof, root, hashes[i]): + echo "Proof verification failed for leaf ", i + check false + + test "Large tree - 1024 leaves": + let db = leveldb.open(TestDbPath) + defer: db.close() + + const numLeaves = 1024 + let storage = newLevelDbMerkleStorage(db, "tree1024") + var builder = newStreamingMerkleBuilder(storage) + + var hashes: seq[MerkleHash] + for i in 0 ..< numLeaves: + let h = sha256Hash(cast[seq[byte]]("block" & $i)) + hashes.add(h) + discard builder.addLeaf(h) + + let rootResult = builder.finalize() + check rootResult.isOk + let root = rootResult.value + + let reader = newMerkleReader(storage) + check reader.leafCount == numLeaves + + let testIndices = @[0, 1, 2, 511, 512, 513, 1022, 1023, + 256, 768, 128, 384, 640, 896] + for i in testIndices: + let proofResult = reader.getProof(uint64(i)) + check proofResult.isOk + let proof = proofResult.value + check verify(proof, root, hashes[i]) + + test "Large packed storage - 500000 leaves": + const numLeaves = 500000 + var hashes: seq[MerkleHash] + var root: MerkleHash + + block: + let storage = newPackedMerkleStorage(TestPackedPath, forWriting = true).get() + var builder = newStreamingMerkleBuilder(storage) + + for i in 0 ..< numLeaves: + let h = sha256Hash(cast[seq[byte]]("block" & $i)) + hashes.add(h) + discard builder.addLeaf(h) + + let rootResult = builder.finalize() + check rootResult.isOk + root = rootResult.value + discard storage.close() + + block: + let storage = newPackedMerkleStorage(TestPackedPath).get() + defer: discard storage.close() + + let reader = newMerkleReader(storage) + check reader.leafCount == numLeaves + + for i in 0 ..< numLeaves: + let proofResult = reader.getProof(uint64(i)) + check proofResult.isOk + let proof = proofResult.value + if not verify(proof, root, hashes[i]): + echo "Packed proof verification failed for leaf ", i + doAssert false + + test "Empty tree finalize fails": + let db = leveldb.open(TestDbPath) + defer: db.close() + + let storage = newLevelDbMerkleStorage(db, "emptyTree") + var builder = newStreamingMerkleBuilder(storage) + + let rootResult = builder.finalize() + check rootResult.isErr + + test "Invalid proof index": + let db = leveldb.open(TestDbPath) + defer: db.close() + + let storage = newLevelDbMerkleStorage(db, "treeInvalid") + var builder = newStreamingMerkleBuilder(storage) + + for i in 0 ..< 4: + let h = sha256Hash(cast[seq[byte]]("block" & $i)) + discard builder.addLeaf(h) + + discard builder.finalize() + + let reader = newMerkleReader(storage) + let proofResult = reader.getProof(10) + check proofResult.isErr + +suite "PackedMerkleStorage error cases": + setup: + cleanup() + + teardown: + cleanup() + + test "Invalid magic in packed file": + # Here create a file with wrong magic bytes + let f = syncio.open(TestPackedPath, fmWrite) + var wrongMagic: uint32 = 0xDEADBEEF'u32 + var version: uint32 = 2 + var leafCount: uint64 = 0 + var numLevels: int32 = 0 + discard f.writeBuffer(addr wrongMagic, 4) + discard f.writeBuffer(addr version, 4) + discard f.writeBuffer(addr leafCount, 8) + discard f.writeBuffer(addr numLevels, 4) + f.close() + + let res = newPackedMerkleStorage(TestPackedPath, forWriting = false) + check res.isErr + check res.error.msg == "Invalid packed merkle file magic" + + test "Unsupported version in packed file": + # Now create a file with correct magic but wrong version + let f = syncio.open(TestPackedPath, fmWrite) + var magic: uint32 = 0x534B4C4D'u32 # PackedMagic + var wrongVersion: uint32 = 99 + var leafCount: uint64 = 0 + var numLevels: int32 = 0 + discard f.writeBuffer(addr magic, 4) + discard f.writeBuffer(addr wrongVersion, 4) + discard f.writeBuffer(addr leafCount, 8) + discard f.writeBuffer(addr numLevels, 4) + f.close() + + let res = newPackedMerkleStorage(TestPackedPath, forWriting = false) + check res.isErr + check "Unsupported packed merkle file version" in res.error.msg + + test "File too small for header": + # And now create a file that's too small + let f = syncio.open(TestPackedPath, fmWrite) + var magic: uint32 = 0x534B4C4D'u32 + discard f.writeBuffer(addr magic, 4) + f.close() + + let res = newPackedMerkleStorage(TestPackedPath, forWriting = false) + check res.isErr + check res.error.msg == "File too small for header" + +suite "MerkleTreeBuilder edge cases": + test "root() returns none when not built": + var builder = newMerkleTreeBuilder() + builder.addBlock(cast[seq[byte]]("block1")) + let rootOpt = builder.root() + check rootOpt.isNone + + test "rootCid() fails when not built": + var builder = newMerkleTreeBuilder() + builder.addBlock(cast[seq[byte]]("block1")) + let cidResult = builder.rootCid() + check cidResult.isErr + check "Tree not built" in cidResult.error.msg + + test "getProof() fails when not built": + var builder = newMerkleTreeBuilder() + builder.addBlock(cast[seq[byte]]("block1")) + let proofResult = builder.getProof(0) + check proofResult.isErr + check "Tree not built" in proofResult.error.msg + + test "addBlock after buildTree raises Defect": + var builder = newMerkleTreeBuilder() + builder.addBlock(cast[seq[byte]]("block1")) + builder.buildTree() + + var raised = false + try: + builder.addBlock(cast[seq[byte]]("block2")) + except Defect: + raised = true + check raised + + test "buildTree on empty builder does nothing": + var builder = newMerkleTreeBuilder() + builder.buildTree() + let rootOpt = builder.root() + check rootOpt.isNone + +suite "Proof verification edge cases": + test "verify() with wrong root length returns error": + var builder = newMerkleTreeBuilder() + let blockData = cast[seq[byte]]("hello world") + builder.addBlock(blockData) + builder.buildTree() + + let proofResult = builder.getProof(0) + check proofResult.isOk + let proof = proofResult.value + + let + shortRoot: array[16, byte] = default(array[16, byte]) + verifyResult = proof.verify(shortRoot, blockData) + check verifyResult.isErr + +suite "rootToCid function": + setup: + cleanup() + + teardown: + cleanup() + + test "rootToCid converts hash to valid CID": + let db = leveldb.open(TestDbPath) + defer: db.close() + + let storage = newLevelDbMerkleStorage(db, "rootToCidTest") + var builder = newStreamingMerkleBuilder(storage) + + let h = sha256Hash(cast[seq[byte]]("block0")) + discard builder.addLeaf(h) + + let rootResult = builder.finalize() + check rootResult.isOk + let root = rootResult.value + + let cidResult = rootToCid(root) + check cidResult.isOk + let cid = cidResult.value + check cid.cidver == CIDv1 + check cid.mcodec == LogosStorageTree + +suite "getRequiredLeafIndices function": + test "single leaf at start": + let res = getRequiredLeafIndices(0, 1, 4) + check 1 in res + + test "single leaf in middle": + let res = getRequiredLeafIndices(2, 1, 4) + check 3 in res + + test "consecutive pair - no extra leaves needed at first level": + let res = getRequiredLeafIndices(0, 2, 4) + check 2 in res + check 3 in res + + test "full range - no extra leaves needed": + let res = getRequiredLeafIndices(0, 4, 4) + check res.len == 0 + + test "larger tree - partial range": + let res = getRequiredLeafIndices(0, 3, 8) + check 3 in res + check 4 in res + check 5 in res + check 6 in res + check 7 in res + + test "non-power-of-two total leaves": + let res = getRequiredLeafIndices(0, 2, 5) + check 2 in res + check 3 in res + check 4 in res