mirror of
https://github.com/logos-storage/nim-blockstore.git
synced 2026-01-10 11:53:09 +00:00
initial commit
Signed-off-by: Chrysostomos Nanakos <chris@include.gr>
This commit is contained in:
commit
7b23545c27
201
LICENSE-APACHEv2
Normal file
201
LICENSE-APACHEv2
Normal file
@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2018 Status Research & Development GmbH
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
21
LICENSE-MIT
Normal file
21
LICENSE-MIT
Normal file
@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2022 Status Research & Development GmbH
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
188
README.md
Normal file
188
README.md
Normal file
@ -0,0 +1,188 @@
|
||||
# nim-blockstore
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](#stability)
|
||||
[](https://nim-lang.org/)
|
||||
|
||||
A content-addressed block storage library for Nim with configurable hash algorithms, codecs, and merkle tree proofs for block verification.
|
||||
|
||||
## Features
|
||||
|
||||
- Content-addressed storage with CIDv1 identifiers
|
||||
- Configurable hash functions and codecs via `BlockHashConfig`
|
||||
- Merkle tree proofs for block verification
|
||||
- Multiple storage backends:
|
||||
- Block storage: sharded files (`bbSharded`) or packed files (`bbPacked`)
|
||||
- Merkle tree storage: embedded proofs (`mbEmbeddedProofs`), LevelDB (`mbLevelDb`), or packed files (`mbPacked`)
|
||||
- Blockmap storage: LevelDB (`bmLevelDb`) or files (`bmFile`)
|
||||
- Direct I/O support (`ioDirect`) for crash consistency and OS cache bypass
|
||||
- Buffered I/O mode (`ioBuffered`) with configurable batch sync
|
||||
- Storage quota management
|
||||
- Background garbage collection with deletion worker
|
||||
- Metadata stored in LevelDB
|
||||
- Async file chunking
|
||||
- Dataset management with manifests
|
||||
|
||||
## Usage
|
||||
|
||||
### Building a Dataset from a File
|
||||
|
||||
```nim
|
||||
import blockstore
|
||||
import taskpools
|
||||
import std/options
|
||||
|
||||
proc buildDataset() {.async.} =
|
||||
# Create a shared thread pool for async file I/O
|
||||
let pool = Taskpool.new(numThreads = 4)
|
||||
defer: pool.shutdown()
|
||||
|
||||
let store = newDatasetStore("./db", "./blocks").get()
|
||||
|
||||
# Start building a dataset with 64KB chunks
|
||||
let builder = store.startDataset(64 * 1024, some("myfile.txt")).get()
|
||||
|
||||
# Chunk the file
|
||||
let stream = (await builder.chunkFile(pool)).get()
|
||||
|
||||
while true:
|
||||
let blockOpt = await stream.nextBlock()
|
||||
if blockOpt.isNone:
|
||||
break
|
||||
let blockResult = blockOpt.get()
|
||||
if blockResult.isErr:
|
||||
echo "Error: ", blockResult.error
|
||||
break
|
||||
discard await builder.addBlock(blockResult.value)
|
||||
|
||||
stream.close()
|
||||
|
||||
# Finalize and get the dataset
|
||||
let dataset = (await builder.finalize()).get()
|
||||
echo "Tree CID: ", dataset.treeCid
|
||||
echo "Manifest CID: ", dataset.manifestCid
|
||||
|
||||
waitFor buildDataset()
|
||||
```
|
||||
|
||||
### Retrieving Blocks with Proofs
|
||||
|
||||
```nim
|
||||
import blockstore
|
||||
|
||||
proc getBlockWithProof(store: DatasetStore, treeCid: Cid, index: int) {.async.} =
|
||||
let datasetOpt = (await store.getDataset(treeCid)).get()
|
||||
if datasetOpt.isNone:
|
||||
echo "Dataset not found"
|
||||
return
|
||||
|
||||
let dataset = datasetOpt.get()
|
||||
let blockOpt = (await dataset.getBlock(index)).get()
|
||||
|
||||
if blockOpt.isSome:
|
||||
let (blk, proof) = blockOpt.get()
|
||||
echo "Block: ", blk
|
||||
echo "Proof index: ", proof.index
|
||||
echo "Proof path length: ", proof.path.len
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
### newDatasetStore
|
||||
|
||||
Creates a new dataset store with configurable backends and I/O modes:
|
||||
|
||||
```nim
|
||||
proc newDatasetStore*(
|
||||
dbPath: string, # Path to LevelDB database
|
||||
blocksDir: string, # Directory for block storage
|
||||
quota: uint64 = 0, # Storage quota (0 = unlimited)
|
||||
blockHashConfig: BlockHashConfig = defaultBlockHashConfig(),
|
||||
merkleBackend: MerkleBackend = mbPacked, # Merkle tree storage backend
|
||||
blockBackend: BlockBackend = bbSharded, # Block storage backend
|
||||
blockmapBackend: BlockmapBackend = bmLevelDb, # Blockmap storage backend
|
||||
ioMode: IOMode = ioDirect, # I/O mode
|
||||
syncBatchSize: int = 0, # Batch size for sync (buffered mode)
|
||||
pool: Taskpool = nil # Thread pool for deletion worker
|
||||
): BResult[DatasetStore]
|
||||
```
|
||||
|
||||
### Storage Backends
|
||||
|
||||
#### BlockBackend
|
||||
|
||||
| Value | Description |
|
||||
|-------|-------------|
|
||||
| `bbSharded` | Sharded directory structure (default). One file per block with 2-level sharding. |
|
||||
| `bbPacked` | Packed file format. All blocks for a dataset in a single file. |
|
||||
|
||||
#### MerkleBackend
|
||||
|
||||
Controls how merkle proofs are stored and generated.
|
||||
|
||||
| Value | Description |
|
||||
|-------|-------------|
|
||||
| `mbEmbeddedProofs` | Proofs computed during build (tree in memory) and embedded in block references in LevelDB. Tree discarded after finalize. Good for smaller datasets. |
|
||||
| `mbLevelDb` | Tree nodes stored in LevelDB. Proofs generated on-demand from stored tree. |
|
||||
| `mbPacked` | Tree nodes in packed files (default). One file per tree. Proofs generated on-demand. Efficient for large datasets. |
|
||||
|
||||
#### BlockmapBackend
|
||||
|
||||
| Value | Description |
|
||||
|-------|-------------|
|
||||
| `bmLevelDb` | LevelDB storage (default). Shared with metadata. |
|
||||
| `bmFile` | File-based storage. One file per blockmap. |
|
||||
|
||||
### I/O Modes
|
||||
|
||||
| Value | Description |
|
||||
|-------|-------------|
|
||||
| `ioDirect` | Direct I/O (default). Bypasses OS cache, data written directly to disk. Provides crash consistency. |
|
||||
| `ioBuffered` | Buffered I/O. Uses OS cache. Use `syncBatchSize` to control sync frequency if needed. |
|
||||
|
||||
### BlockHashConfig
|
||||
|
||||
Configuration for block hashing and CID generation:
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `hashFunc` | `HashFunc` | Hash function `proc(data: openArray[byte]): HashDigest` |
|
||||
| `hashCode` | `MultiCodec` | Multicodec identifier for the hash (e.g., `Sha256Code`) |
|
||||
| `blockCodec` | `MultiCodec` | Codec for blocks (e.g., `LogosStorageBlock`) |
|
||||
| `treeCodec` | `MultiCodec` | Codec for merkle tree CIDs (e.g., `LogosStorageTree`) |
|
||||
|
||||
## Running Tests
|
||||
|
||||
```bash
|
||||
nimble test
|
||||
```
|
||||
|
||||
## Code Coverage
|
||||
|
||||
Generate HTML coverage reports:
|
||||
|
||||
```bash
|
||||
# All tests
|
||||
nimble coverage
|
||||
|
||||
# Individual test suites
|
||||
nimble coverage_merkle
|
||||
nimble coverage_block
|
||||
nimble coverage_chunker
|
||||
|
||||
# Clean coverage data
|
||||
nimble coverage_clean
|
||||
```
|
||||
|
||||
## Stability
|
||||
|
||||
This library is in experimental status and may have breaking changes between versions until it stabilizes.
|
||||
|
||||
## License
|
||||
|
||||
nim-blockstore is licensed and distributed under either of:
|
||||
|
||||
* Apache License, Version 2.0: [LICENSE-APACHEv2](LICENSE-APACHEv2) or https://opensource.org/licenses/Apache-2.0
|
||||
* MIT license: [LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT
|
||||
|
||||
at your option. The contents of this repository may not be copied, modified, or distributed except according to those terms.
|
||||
2
blockstore.nim
Normal file
2
blockstore.nim
Normal file
@ -0,0 +1,2 @@
|
||||
import blockstore/blockstore
|
||||
export blockstore
|
||||
116
blockstore.nimble
Normal file
116
blockstore.nimble
Normal file
@ -0,0 +1,116 @@
|
||||
# Package
|
||||
|
||||
version = "0.1.0"
|
||||
author = "Status Research & Development GmbH"
|
||||
description = "Nim blockstore"
|
||||
license = "Apache License 2.0 or MIT"
|
||||
srcDir = "blockstore"
|
||||
|
||||
requires "nim >= 2.2.4"
|
||||
requires "nimcrypto >= 0.6.0"
|
||||
requires "leveldbstatic >= 0.1.0"
|
||||
requires "results >= 0.4.0"
|
||||
requires "chronos >= 4.0.0"
|
||||
requires "libp2p >= 1.14.1 & < 2.0.0"
|
||||
requires "constantine >= 0.2.0"
|
||||
requires "taskpools >= 0.0.5"
|
||||
requires "hashlib >= 1.0.0"
|
||||
|
||||
task test, "Run the test suite":
|
||||
exec "nim c -r tests/test_block.nim"
|
||||
exec "nim c -r tests/test_merkle.nim"
|
||||
exec "nim c -r tests/test_chunker.nim"
|
||||
exec "nim c -r tests/test_dataset.nim"
|
||||
|
||||
task test_constantine, "Run the test suite":
|
||||
exec "nim c -d:useConstantine -r tests/test_block.nim"
|
||||
exec "nim c -d:useConstantine -r tests/test_merkle.nim"
|
||||
exec "nim c -d:useConstantine -r tests/test_chunker.nim"
|
||||
exec "nim c -d:useConstantine -r tests/test_dataset.nim"
|
||||
|
||||
task test_blake3, "Run the test suite":
|
||||
exec "nim c -d:useBlake3 -r tests/test_block.nim"
|
||||
exec "nim c -d:useBlake3 -r tests/test_merkle.nim"
|
||||
exec "nim c -d:useBlake3 -r tests/test_chunker.nim"
|
||||
exec "nim c -d:useBlake3 -r tests/test_dataset.nim"
|
||||
|
||||
task test_clean, "Clean test binaries":
|
||||
exec "rm tests/test_block"
|
||||
exec "rm tests/test_merkle"
|
||||
exec "rm tests/test_chunker"
|
||||
exec "rm tests/test_dataset"
|
||||
|
||||
task benchmark, "Compile dataset benchmark":
|
||||
exec "nim c --hints:off -d:release -r tests/bench_dataset.nim"
|
||||
|
||||
task benchmark_constantine, "Compile dataset benchmark with constantine":
|
||||
exec "nim c --hints:off -d:release -d:useConstantine -r tests/bench_dataset.nim"
|
||||
|
||||
task benchmark_blake3, "Compile dataset benchmark with BLAKE3":
|
||||
exec "nim c --hints:off -d:release -d:useBlake3 -r tests/bench_dataset.nim"
|
||||
|
||||
task benchmark_merkle, "Compile merkle benchmark":
|
||||
exec "nim c --hints:off -d:release -r tests/bench_merkle.nim"
|
||||
|
||||
task benchmark_merkle_constantine, "Compile merkle benchmark":
|
||||
exec "nim c --hints:off -d:release -d:useConstantine -r tests/bench_merkle.nim"
|
||||
|
||||
task benchmark_merkle_blake3, "Compile merkle benchmark":
|
||||
exec "nim c --hints:off -d:release -d:useBlake3 -r tests/bench_merkle.nim"
|
||||
|
||||
const
|
||||
nimcacheBase = ".nimcache"
|
||||
coverageFlags = "--passC:\"-fprofile-arcs -ftest-coverage\" --passL:\"-fprofile-arcs -ftest-coverage\""
|
||||
coverageDir = "coverage_report"
|
||||
|
||||
proc runCoverage(testFile: string, reportName: string) =
|
||||
let nimcacheDir = nimcacheBase & "/" & reportName
|
||||
exec "nim c " & coverageFlags & " --nimcache:" & nimcacheDir & " -r tests/" & testFile & ".nim"
|
||||
exec "lcov --capture --directory " & nimcacheDir & " --output-file " & reportName & ".info --quiet"
|
||||
exec "lcov --extract " & reportName & ".info '*@sblockstore@s*' --output-file " & reportName & "_filtered.info --quiet"
|
||||
exec "genhtml " & reportName & "_filtered.info --output-directory " & coverageDir & "/" & reportName & " --quiet"
|
||||
exec "rm -f " & reportName & ".info " & reportName & "_filtered.info"
|
||||
echo "Coverage report: " & coverageDir & "/" & reportName & "/index.html"
|
||||
|
||||
task coverage, "Run all tests with coverage and generate HTML report":
|
||||
mkDir(coverageDir)
|
||||
mkDir(nimcacheBase)
|
||||
exec "nim c " & coverageFlags & " --nimcache:" & nimcacheBase & "/test_block -r tests/test_block.nim"
|
||||
exec "nim c " & coverageFlags & " --nimcache:" & nimcacheBase & "/test_merkle -r tests/test_merkle.nim"
|
||||
exec "nim c " & coverageFlags & " --nimcache:" & nimcacheBase & "/test_chunker -r tests/test_chunker.nim"
|
||||
exec "lcov --capture --directory " & nimcacheBase & "/test_block --directory " & nimcacheBase & "/test_merkle --directory " & nimcacheBase & "/test_chunker --output-file all_coverage.info --quiet"
|
||||
exec "lcov --extract all_coverage.info '*@sblockstore@s*' --output-file blockstore_coverage.info --quiet"
|
||||
exec "genhtml blockstore_coverage.info --output-directory " & coverageDir & "/all --quiet"
|
||||
exec "rm -f all_coverage.info blockstore_coverage.info"
|
||||
echo "Coverage report: " & coverageDir & "/all/index.html"
|
||||
|
||||
task coverage_merkle, "Run merkle tests with coverage":
|
||||
mkDir(coverageDir)
|
||||
mkDir(nimcacheBase)
|
||||
runCoverage("test_merkle", "test_merkle")
|
||||
|
||||
task coverage_block, "Run block tests with coverage":
|
||||
mkDir(coverageDir)
|
||||
mkDir(nimcacheBase)
|
||||
runCoverage("test_block", "test_block")
|
||||
|
||||
task coverage_chunker, "Run chunker tests with coverage":
|
||||
mkDir(coverageDir)
|
||||
mkDir(nimcacheBase)
|
||||
runCoverage("test_chunker", "test_chunker")
|
||||
|
||||
task coverage_bench_merkle, "Run merkle benchmark with coverage":
|
||||
mkDir(coverageDir)
|
||||
mkDir(nimcacheBase)
|
||||
let nimcacheDir = nimcacheBase & "/bench_merkle"
|
||||
exec "nim c " & coverageFlags & " --nimcache:" & nimcacheDir & " -r tests/bench_merkle.nim --size=100MB"
|
||||
exec "lcov --capture --directory " & nimcacheDir & " --output-file bench_merkle.info --quiet"
|
||||
exec "lcov --extract bench_merkle.info '*@sblockstore@s*' --output-file bench_merkle_filtered.info --quiet"
|
||||
exec "genhtml bench_merkle_filtered.info --output-directory " & coverageDir & "/bench_merkle --quiet"
|
||||
exec "rm -f bench_merkle.info bench_merkle_filtered.info"
|
||||
echo "Coverage report: " & coverageDir & "/bench_merkle/index.html"
|
||||
|
||||
task coverage_clean, "Clean coverage data and reports":
|
||||
exec "rm -rf " & coverageDir
|
||||
exec "rm -rf " & nimcacheBase
|
||||
echo "Coverage data cleaned"
|
||||
497
blockstore/blockmap.nim
Normal file
497
blockstore/blockmap.nim
Normal file
@ -0,0 +1,497 @@
|
||||
import std/[os, bitops, memfiles, posix]
|
||||
import results
|
||||
|
||||
import ./errors
|
||||
import ./sharding
|
||||
import ./cid
|
||||
|
||||
proc newBlockmap*(size: int): seq[byte] =
|
||||
let byteCount = (size + 7) div 8
|
||||
newSeq[byte](byteCount)
|
||||
|
||||
proc blockmapGet*(blockmap: seq[byte], index: int): bool =
|
||||
if index < 0:
|
||||
return false
|
||||
let
|
||||
byteIdx = index div 8
|
||||
bitIdx = index mod 8
|
||||
if byteIdx >= blockmap.len:
|
||||
return false
|
||||
(blockmap[byteIdx] and (1'u8 shl bitIdx)) != 0
|
||||
|
||||
proc blockmapSet*(blockmap: var seq[byte], index: int, value: bool) =
|
||||
if index < 0:
|
||||
return
|
||||
let
|
||||
byteIdx = index div 8
|
||||
bitIdx = index mod 8
|
||||
if byteIdx >= blockmap.len:
|
||||
return
|
||||
if value:
|
||||
blockmap[byteIdx] = blockmap[byteIdx] or (1'u8 shl bitIdx)
|
||||
else:
|
||||
blockmap[byteIdx] = blockmap[byteIdx] and not (1'u8 shl bitIdx)
|
||||
|
||||
proc blockmapCountOnes*(blockmap: seq[byte]): int =
|
||||
result = 0
|
||||
for b in blockmap:
|
||||
result += countSetBits(b)
|
||||
|
||||
const
|
||||
BlockmapMagic = 0x424D4150'u32
|
||||
BlockmapVersion = 1'u8
|
||||
BlocksPerChunk* = 1024 * 1024
|
||||
GrowthChunk = 1024 * 1024
|
||||
HeaderSize = 24
|
||||
ChunkEmpty* = 0x00'u8
|
||||
ChunkFull* = 0xFF'u8
|
||||
ChunkPartial* = 0x01'u8
|
||||
|
||||
type
|
||||
BlockmapBackend* = enum
|
||||
bmLevelDb
|
||||
bmFile
|
||||
|
||||
FileBlockmap* = ref object
|
||||
path: string
|
||||
file: MemFile
|
||||
fileSize: int
|
||||
maxIndex: uint64
|
||||
indexSize: uint32
|
||||
readOnly: bool
|
||||
|
||||
BlockRange* = object
|
||||
start*: uint64
|
||||
count*: uint64
|
||||
|
||||
proc headerMaxIndex(mem: pointer): ptr uint64 {.inline.} =
|
||||
cast[ptr uint64](cast[uint](mem) + 8)
|
||||
|
||||
proc headerIndexSize(mem: pointer): ptr uint32 {.inline.} =
|
||||
cast[ptr uint32](cast[uint](mem) + 16)
|
||||
|
||||
proc indexOffset(): int {.inline.} =
|
||||
HeaderSize
|
||||
|
||||
proc bitmapOffset(indexSize: uint32): int {.inline.} =
|
||||
HeaderSize + indexSize.int
|
||||
|
||||
proc chunkIndexPtr(bm: FileBlockmap, chunkIdx: uint32): ptr uint8 {.inline.} =
|
||||
if chunkIdx >= bm.indexSize:
|
||||
return nil
|
||||
cast[ptr uint8](cast[uint](bm.file.mem) + indexOffset().uint + chunkIdx.uint)
|
||||
|
||||
proc bitmapBytePtr(bm: FileBlockmap, byteIdx: uint64): ptr uint8 {.inline.} =
|
||||
let offset = bitmapOffset(bm.indexSize).uint64 + byteIdx
|
||||
if offset.int >= bm.fileSize:
|
||||
return nil
|
||||
cast[ptr uint8](cast[uint](bm.file.mem) + offset.uint)
|
||||
|
||||
proc getChunkState*(bm: FileBlockmap, chunkIdx: uint32): uint8 =
|
||||
let p = bm.chunkIndexPtr(chunkIdx)
|
||||
if p == nil:
|
||||
return ChunkEmpty
|
||||
p[]
|
||||
|
||||
proc setChunkState(bm: FileBlockmap, chunkIdx: uint32, state: uint8) =
|
||||
let p = bm.chunkIndexPtr(chunkIdx)
|
||||
if p != nil:
|
||||
p[] = state
|
||||
|
||||
proc neededFileSize(blockIndex: uint64, currentIndexSize: uint32): tuple[fileSize: int, indexSize: uint32] =
|
||||
let chunkIdx = (blockIndex div BlocksPerChunk).uint32 + 1
|
||||
let newIndexSize = max(currentIndexSize, chunkIdx)
|
||||
let byteIdx = blockIndex div 8
|
||||
let bitmapEnd = bitmapOffset(newIndexSize) + byteIdx.int + 1
|
||||
let fileSize = ((bitmapEnd + GrowthChunk - 1) div GrowthChunk) * GrowthChunk
|
||||
(fileSize, newIndexSize)
|
||||
|
||||
proc growFile(bm: FileBlockmap, newSize: int, newIndexSize: uint32): BResult[void] =
|
||||
if bm.readOnly:
|
||||
return err(ioError("Cannot grow read-only blockmap"))
|
||||
|
||||
var oldBitmapData: seq[byte] = @[]
|
||||
let oldIndexSize = bm.indexSize
|
||||
let oldBitmapOffset = bitmapOffset(oldIndexSize)
|
||||
let newBitmapOffset = bitmapOffset(newIndexSize)
|
||||
|
||||
if newIndexSize > oldIndexSize and oldIndexSize > 0:
|
||||
let bitmapSize = bm.fileSize - oldBitmapOffset
|
||||
if bitmapSize > 0:
|
||||
oldBitmapData = newSeq[byte](bitmapSize)
|
||||
copyMem(addr oldBitmapData[0], cast[pointer](cast[uint](bm.file.mem) + oldBitmapOffset.uint), bitmapSize)
|
||||
|
||||
bm.file.close()
|
||||
|
||||
try:
|
||||
let fd = posix.open(bm.path.cstring, O_RDWR)
|
||||
if fd < 0:
|
||||
return err(ioError("Failed to open file for truncate"))
|
||||
if ftruncate(fd, newSize.Off) != 0:
|
||||
discard posix.close(fd)
|
||||
return err(ioError("Failed to truncate file"))
|
||||
discard posix.close(fd)
|
||||
except OSError as e:
|
||||
return err(ioError("Failed to grow file: " & e.msg))
|
||||
|
||||
try:
|
||||
bm.file = memfiles.open(bm.path, fmReadWrite, mappedSize = newSize)
|
||||
except OSError as e:
|
||||
return err(ioError("Failed to remap file: " & e.msg))
|
||||
|
||||
bm.fileSize = newSize
|
||||
headerIndexSize(bm.file.mem)[] = newIndexSize
|
||||
|
||||
for i in oldIndexSize ..< newIndexSize:
|
||||
let p = cast[ptr uint8](cast[uint](bm.file.mem) + indexOffset().uint + i.uint)
|
||||
p[] = ChunkEmpty
|
||||
|
||||
if oldBitmapData.len > 0:
|
||||
copyMem(cast[pointer](cast[uint](bm.file.mem) + newBitmapOffset.uint), addr oldBitmapData[0], oldBitmapData.len)
|
||||
if newBitmapOffset > oldBitmapOffset:
|
||||
let gapSize = min(newBitmapOffset - oldBitmapOffset, oldBitmapData.len)
|
||||
zeroMem(cast[pointer](cast[uint](bm.file.mem) + oldBitmapOffset.uint), gapSize)
|
||||
|
||||
bm.indexSize = newIndexSize
|
||||
ok()
|
||||
|
||||
proc ensureCapacity(bm: FileBlockmap, blockIndex: uint64): BResult[void] =
|
||||
let (neededSize, neededIndexSize) = neededFileSize(blockIndex, bm.indexSize)
|
||||
if neededSize <= bm.fileSize and neededIndexSize <= bm.indexSize:
|
||||
return ok()
|
||||
?bm.growFile(max(neededSize, bm.fileSize), max(neededIndexSize, bm.indexSize))
|
||||
ok()
|
||||
|
||||
proc get*(bm: FileBlockmap, index: uint64): bool {.inline.} =
|
||||
if index >= bm.maxIndex:
|
||||
return false
|
||||
|
||||
let chunkIdx = (index div BlocksPerChunk).uint32
|
||||
let chunkState = bm.getChunkState(chunkIdx)
|
||||
|
||||
if chunkState == ChunkEmpty:
|
||||
return false
|
||||
if chunkState == ChunkFull:
|
||||
return true
|
||||
|
||||
let byteIdx = index div 8
|
||||
let bitIdx = index mod 8
|
||||
let p = bm.bitmapBytePtr(byteIdx)
|
||||
if p == nil:
|
||||
return false
|
||||
(p[] and (1'u8 shl bitIdx)) != 0
|
||||
|
||||
proc set*(bm: FileBlockmap, index: uint64): BResult[void] =
|
||||
if bm.readOnly:
|
||||
return err(ioError("Cannot write to read-only blockmap"))
|
||||
|
||||
?bm.ensureCapacity(index)
|
||||
|
||||
let chunkIdx = (index div BlocksPerChunk).uint32
|
||||
let chunkState = bm.getChunkState(chunkIdx)
|
||||
|
||||
if chunkState == ChunkFull:
|
||||
return ok()
|
||||
|
||||
let byteIdx = index div 8
|
||||
let bitIdx = index mod 8
|
||||
let p = bm.bitmapBytePtr(byteIdx)
|
||||
if p != nil:
|
||||
p[] = p[] or (1'u8 shl bitIdx)
|
||||
|
||||
if chunkState == ChunkEmpty:
|
||||
bm.setChunkState(chunkIdx, ChunkPartial)
|
||||
|
||||
if index + 1 > bm.maxIndex:
|
||||
bm.maxIndex = index + 1
|
||||
headerMaxIndex(bm.file.mem)[] = bm.maxIndex
|
||||
|
||||
ok()
|
||||
|
||||
proc clear*(bm: FileBlockmap, index: uint64): BResult[void] =
|
||||
if bm.readOnly:
|
||||
return err(ioError("Cannot write to read-only blockmap"))
|
||||
|
||||
if index >= bm.maxIndex:
|
||||
return ok()
|
||||
|
||||
let chunkIdx = (index div BlocksPerChunk).uint32
|
||||
let chunkState = bm.getChunkState(chunkIdx)
|
||||
|
||||
if chunkState == ChunkEmpty:
|
||||
return ok()
|
||||
|
||||
let byteIdx = index div 8
|
||||
let bitIdx = index mod 8
|
||||
let p = bm.bitmapBytePtr(byteIdx)
|
||||
if p != nil:
|
||||
p[] = p[] and not (1'u8 shl bitIdx)
|
||||
|
||||
if chunkState == ChunkFull:
|
||||
bm.setChunkState(chunkIdx, ChunkPartial)
|
||||
|
||||
ok()
|
||||
|
||||
proc countChunkBits(bm: FileBlockmap, chunkIdx: uint32): int =
|
||||
let startBlock = chunkIdx.uint64 * BlocksPerChunk
|
||||
let endBlock = min(startBlock + BlocksPerChunk, bm.maxIndex)
|
||||
if startBlock >= endBlock:
|
||||
return 0
|
||||
|
||||
let startByte = startBlock div 8
|
||||
let endByte = (endBlock + 7) div 8
|
||||
|
||||
result = 0
|
||||
for i in startByte ..< endByte:
|
||||
let p = bm.bitmapBytePtr(i)
|
||||
if p != nil:
|
||||
result += countSetBits(p[])
|
||||
|
||||
proc compactIndex*(bm: FileBlockmap) =
|
||||
if bm.readOnly:
|
||||
return
|
||||
|
||||
for i in 0'u32 ..< bm.indexSize:
|
||||
let state = bm.getChunkState(i)
|
||||
if state == ChunkPartial:
|
||||
let bits = bm.countChunkBits(i)
|
||||
let startBlock = i.uint64 * BlocksPerChunk
|
||||
let blocksInChunk = min(BlocksPerChunk.uint64, bm.maxIndex - startBlock).int
|
||||
|
||||
if bits == 0:
|
||||
bm.setChunkState(i, ChunkEmpty)
|
||||
elif bits == blocksInChunk:
|
||||
bm.setChunkState(i, ChunkFull)
|
||||
|
||||
proc countOnes*(bm: FileBlockmap): uint64 =
|
||||
result = 0
|
||||
for i in 0'u32 ..< bm.indexSize:
|
||||
let state = bm.getChunkState(i)
|
||||
case state
|
||||
of ChunkEmpty:
|
||||
discard
|
||||
of ChunkFull:
|
||||
let startBlock = i.uint64 * BlocksPerChunk
|
||||
result += min(BlocksPerChunk.uint64, bm.maxIndex - startBlock)
|
||||
else:
|
||||
result += bm.countChunkBits(i).uint64
|
||||
|
||||
proc isComplete*(bm: FileBlockmap, totalBlocks: uint64): bool =
|
||||
if bm.maxIndex < totalBlocks:
|
||||
return false
|
||||
let neededChunks = ((totalBlocks + BlocksPerChunk - 1) div BlocksPerChunk).uint32
|
||||
for i in 0'u32 ..< neededChunks:
|
||||
if bm.getChunkState(i) != ChunkFull:
|
||||
return false
|
||||
true
|
||||
|
||||
proc isEmpty*(bm: FileBlockmap): bool =
|
||||
for i in 0'u32 ..< bm.indexSize:
|
||||
if bm.getChunkState(i) != ChunkEmpty:
|
||||
return false
|
||||
true
|
||||
|
||||
proc maxBlockIndex*(bm: FileBlockmap): uint64 =
|
||||
bm.maxIndex
|
||||
|
||||
proc toRanges*(bm: FileBlockmap): seq[BlockRange] =
|
||||
result = @[]
|
||||
if bm.indexSize == 0:
|
||||
return
|
||||
|
||||
var currentStart: uint64 = 0
|
||||
var inRange = false
|
||||
|
||||
for i in 0'u32 ..< bm.indexSize:
|
||||
let state = bm.getChunkState(i)
|
||||
let chunkStart = i.uint64 * BlocksPerChunk
|
||||
let chunkEnd = min(chunkStart + BlocksPerChunk, bm.maxIndex)
|
||||
|
||||
case state
|
||||
of ChunkFull:
|
||||
if not inRange:
|
||||
currentStart = chunkStart
|
||||
inRange = true
|
||||
|
||||
if i == bm.indexSize - 1 or bm.getChunkState(i + 1) != ChunkFull:
|
||||
result.add(BlockRange(start: currentStart, count: chunkEnd - currentStart))
|
||||
inRange = false
|
||||
|
||||
of ChunkEmpty:
|
||||
if inRange:
|
||||
result.add(BlockRange(start: currentStart, count: chunkStart - currentStart))
|
||||
inRange = false
|
||||
|
||||
of ChunkPartial:
|
||||
if inRange:
|
||||
result.add(BlockRange(start: currentStart, count: chunkStart - currentStart))
|
||||
inRange = false
|
||||
|
||||
var j = chunkStart
|
||||
while j < chunkEnd:
|
||||
if bm.get(j):
|
||||
let rangeStart = j
|
||||
while j < chunkEnd and bm.get(j):
|
||||
inc j
|
||||
result.add(BlockRange(start: rangeStart, count: j - rangeStart))
|
||||
else:
|
||||
inc j
|
||||
|
||||
else:
|
||||
discard
|
||||
|
||||
proc flush*(bm: FileBlockmap) =
|
||||
if not bm.readOnly:
|
||||
bm.file.flush()
|
||||
|
||||
proc close*(bm: FileBlockmap) =
|
||||
if bm.file.mem != nil:
|
||||
bm.flush()
|
||||
bm.file.close()
|
||||
|
||||
proc setAll*(bm: FileBlockmap, totalBlocks: uint64): BResult[void] =
|
||||
if bm.readOnly:
|
||||
return err(ioError("Cannot write to read-only blockmap"))
|
||||
|
||||
if totalBlocks == 0:
|
||||
return ok()
|
||||
|
||||
?bm.ensureCapacity(totalBlocks - 1)
|
||||
|
||||
let fullBytes = totalBlocks div 8
|
||||
let remainderBits = totalBlocks mod 8
|
||||
|
||||
for i in 0'u64 ..< fullBytes:
|
||||
let p = bm.bitmapBytePtr(i)
|
||||
if p != nil:
|
||||
p[] = 0xFF'u8
|
||||
|
||||
if remainderBits > 0:
|
||||
let p = bm.bitmapBytePtr(fullBytes)
|
||||
if p != nil:
|
||||
p[] = (1'u8 shl remainderBits) - 1
|
||||
|
||||
bm.maxIndex = totalBlocks
|
||||
headerMaxIndex(bm.file.mem)[] = totalBlocks
|
||||
|
||||
let chunkCount = ((totalBlocks + BlocksPerChunk - 1) div BlocksPerChunk).uint32
|
||||
for i in 0'u32 ..< chunkCount:
|
||||
bm.setChunkState(i, ChunkFull)
|
||||
|
||||
ok()
|
||||
|
||||
proc finalize*(bm: FileBlockmap, totalBlocks: uint64): BResult[void] =
|
||||
if bm.readOnly:
|
||||
return ok()
|
||||
|
||||
if totalBlocks > bm.maxIndex:
|
||||
bm.maxIndex = totalBlocks
|
||||
headerMaxIndex(bm.file.mem)[] = totalBlocks
|
||||
|
||||
bm.compactIndex()
|
||||
bm.flush()
|
||||
ok()
|
||||
|
||||
proc newFileBlockmap*(path: string, forWriting: bool = true): BResult[FileBlockmap] =
|
||||
let parentDir = parentDir(path)
|
||||
if not dirExists(parentDir):
|
||||
try:
|
||||
createDir(parentDir)
|
||||
except OSError as e:
|
||||
return err(ioError("Failed to create directory: " & e.msg))
|
||||
|
||||
var isNew = not fileExists(path)
|
||||
|
||||
if isNew and not forWriting:
|
||||
return err(ioError("Blockmap file does not exist: " & path))
|
||||
|
||||
var initialSize = HeaderSize + GrowthChunk
|
||||
|
||||
if isNew:
|
||||
try:
|
||||
let fd = posix.open(path.cstring, O_RDWR or O_CREAT, 0o644)
|
||||
if fd < 0:
|
||||
return err(ioError("Failed to create blockmap file"))
|
||||
if ftruncate(fd, initialSize.Off) != 0:
|
||||
discard posix.close(fd)
|
||||
return err(ioError("Failed to set initial file size"))
|
||||
discard posix.close(fd)
|
||||
except OSError as e:
|
||||
return err(ioError("Failed to create blockmap file: " & e.msg))
|
||||
else:
|
||||
try:
|
||||
initialSize = getFileSize(path).int
|
||||
except OSError as e:
|
||||
return err(ioError("Failed to get file size: " & e.msg))
|
||||
|
||||
let mode = if forWriting: fmReadWrite else: fmRead
|
||||
var mf: MemFile
|
||||
try:
|
||||
mf = memfiles.open(path, mode, mappedSize = initialSize)
|
||||
except OSError as e:
|
||||
return err(ioError("Failed to mmap blockmap: " & e.msg))
|
||||
|
||||
var bm = FileBlockmap(
|
||||
path: path,
|
||||
file: mf,
|
||||
fileSize: initialSize,
|
||||
maxIndex: 0,
|
||||
indexSize: 0,
|
||||
readOnly: not forWriting
|
||||
)
|
||||
|
||||
if isNew:
|
||||
let header = cast[ptr uint32](mf.mem)
|
||||
header[] = BlockmapMagic
|
||||
cast[ptr uint8](cast[uint](mf.mem) + 4)[] = BlockmapVersion
|
||||
headerMaxIndex(mf.mem)[] = 0
|
||||
headerIndexSize(mf.mem)[] = 0
|
||||
else:
|
||||
let magic = cast[ptr uint32](mf.mem)[]
|
||||
if magic != BlockmapMagic:
|
||||
mf.close()
|
||||
return err(ioError("Invalid blockmap magic"))
|
||||
let version = cast[ptr uint8](cast[uint](mf.mem) + 4)[]
|
||||
if version != BlockmapVersion:
|
||||
mf.close()
|
||||
return err(ioError("Unsupported blockmap version"))
|
||||
bm.maxIndex = headerMaxIndex(mf.mem)[]
|
||||
bm.indexSize = headerIndexSize(mf.mem)[]
|
||||
|
||||
ok(bm)
|
||||
|
||||
proc getBlockmapPath*(blockmapsDir: string, treeCid: Cid): string =
|
||||
getShardedPath(blockmapsDir, treeCid, ".blkmap")
|
||||
|
||||
proc getBlockmapPathStr*(blockmapsDir: string, treeCidStr: string): string =
|
||||
getShardedPathStr(blockmapsDir, treeCidStr, ".blkmap")
|
||||
|
||||
proc toSeqByte*(bm: FileBlockmap): seq[byte] =
|
||||
let bitmapSize = (bm.maxIndex + 7) div 8
|
||||
result = newSeq[byte](bitmapSize.int)
|
||||
for i in 0'u64 ..< bitmapSize:
|
||||
let p = bm.bitmapBytePtr(i)
|
||||
if p != nil:
|
||||
result[i.int] = p[]
|
||||
|
||||
proc fromSeqByte*(bm: FileBlockmap, data: seq[byte]): BResult[void] =
|
||||
if bm.readOnly:
|
||||
return err(ioError("Cannot write to read-only blockmap"))
|
||||
|
||||
let maxIndex = data.len.uint64 * 8
|
||||
?bm.ensureCapacity(maxIndex - 1)
|
||||
|
||||
for i in 0'u64 ..< data.len.uint64:
|
||||
let p = bm.bitmapBytePtr(i)
|
||||
if p != nil:
|
||||
p[] = data[i.int]
|
||||
|
||||
bm.maxIndex = maxIndex
|
||||
headerMaxIndex(bm.file.mem)[] = maxIndex
|
||||
|
||||
let chunkCount = ((maxIndex + BlocksPerChunk - 1) div BlocksPerChunk).uint32
|
||||
for i in 0'u32 ..< chunkCount:
|
||||
bm.setChunkState(i, ChunkPartial)
|
||||
bm.compactIndex()
|
||||
|
||||
ok()
|
||||
92
blockstore/blocks.nim
Normal file
92
blockstore/blocks.nim
Normal file
@ -0,0 +1,92 @@
|
||||
import std/hashes
|
||||
import results
|
||||
import libp2p/multicodec
|
||||
|
||||
import ./errors
|
||||
import ./cid
|
||||
import ./sha256
|
||||
|
||||
type
|
||||
HashDigest* = array[32, byte]
|
||||
HashFunc* = proc(data: openArray[byte]): HashDigest {.noSideEffect, gcsafe, raises: [].}
|
||||
|
||||
BlockHashConfig* = object
|
||||
hashFunc*: HashFunc
|
||||
hashCode*: MultiCodec
|
||||
blockCodec*: MultiCodec
|
||||
treeCodec*: MultiCodec
|
||||
|
||||
Block* = ref object
|
||||
cid*: Cid
|
||||
data*: seq[byte]
|
||||
|
||||
BlockMetadata* = object
|
||||
cid*: string
|
||||
size*: int
|
||||
index*: int
|
||||
|
||||
proc sha256HashFunc*(data: openArray[byte]): HashDigest {.noSideEffect, gcsafe, raises: [].} =
|
||||
sha256Hash(data)
|
||||
|
||||
proc defaultBlockHashConfig*(): BlockHashConfig {.gcsafe.} =
|
||||
BlockHashConfig(
|
||||
hashFunc: sha256HashFunc,
|
||||
hashCode: Sha256Code,
|
||||
blockCodec: LogosStorageBlock,
|
||||
treeCodec: LogosStorageTree
|
||||
)
|
||||
|
||||
proc computeCid*(data: openArray[byte], config: BlockHashConfig): BResult[Cid] =
|
||||
let
|
||||
hash = config.hashFunc(data)
|
||||
mh = ?wrap(config.hashCode, hash)
|
||||
newCidV1(config.blockCodec, mh)
|
||||
|
||||
proc computeCid*(data: openArray[byte]): BResult[Cid] =
|
||||
computeCid(data, defaultBlockHashConfig())
|
||||
|
||||
proc newBlock*(data: seq[byte], config: BlockHashConfig): BResult[Block] =
|
||||
let c = ?computeCid(data, config)
|
||||
var blk = new(Block)
|
||||
blk.cid = c
|
||||
blk.data = data
|
||||
ok(blk)
|
||||
|
||||
proc newBlock*(data: seq[byte]): BResult[Block] =
|
||||
newBlock(data, defaultBlockHashConfig())
|
||||
|
||||
proc newBlock*(data: string, config: BlockHashConfig): BResult[Block] =
|
||||
newBlock(cast[seq[byte]](data), config)
|
||||
|
||||
proc newBlock*(data: string): BResult[Block] =
|
||||
newBlock(cast[seq[byte]](data), defaultBlockHashConfig())
|
||||
|
||||
proc fromCidUnchecked*(cid: Cid, data: seq[byte]): Block =
|
||||
var blk = new(Block)
|
||||
blk.cid = cid
|
||||
blk.data = data
|
||||
blk
|
||||
|
||||
proc verify*(b: Block): BResult[bool] =
|
||||
let computed = ?computeCid(b.data)
|
||||
ok(computed == b.cid)
|
||||
|
||||
proc size*(b: Block): int {.inline.} =
|
||||
b.data.len
|
||||
|
||||
proc `==`*(a, b: Block): bool =
|
||||
a.cid == b.cid and a.data == b.data
|
||||
|
||||
proc hash*(b: Block): Hash =
|
||||
var h: Hash = 0
|
||||
h = h !& hash(b.cid.toBytes())
|
||||
!$h
|
||||
|
||||
proc newBlockMetadata*(cid: Cid, size: int, index: int): BlockMetadata =
|
||||
BlockMetadata(cid: $cid, size: size, index: index)
|
||||
|
||||
proc `$`*(b: Block): string =
|
||||
"Block(" & $b.cid & ", size=" & $b.size & ")"
|
||||
|
||||
proc `$`*(m: BlockMetadata): string =
|
||||
"BlockMetadata(cid=" & m.cid & ", size=" & $m.size & ", index=" & $m.index & ")"
|
||||
23
blockstore/blockstore.nim
Normal file
23
blockstore/blockstore.nim
Normal file
@ -0,0 +1,23 @@
|
||||
import ./errors
|
||||
import ./cid
|
||||
import ./blocks
|
||||
import ./serialization
|
||||
import ./merkle
|
||||
import ./chunker
|
||||
import ./manifest
|
||||
import ./repostore
|
||||
import ./dataset
|
||||
|
||||
export errors
|
||||
export cid
|
||||
export blocks
|
||||
export serialization
|
||||
export merkle
|
||||
export chunker
|
||||
export manifest
|
||||
export repostore
|
||||
export dataset
|
||||
|
||||
const
|
||||
BlockstoreVersion* = "0.1.0"
|
||||
BlockstoreDescription* = "Nim blockstore"
|
||||
261
blockstore/chunker.nim
Normal file
261
blockstore/chunker.nim
Normal file
@ -0,0 +1,261 @@
|
||||
import std/[os, options]
|
||||
import chronos
|
||||
import chronos/threadsync
|
||||
import taskpools
|
||||
import results
|
||||
|
||||
import ./errors
|
||||
import ./blocks as blk
|
||||
|
||||
when defined(posix):
|
||||
import std/posix
|
||||
|
||||
when defined(windows):
|
||||
import std/winlean
|
||||
|
||||
const
|
||||
DefaultChunkSize* = 64 * 1024
|
||||
MinPoolSize* = 2 #TODO cnanakos: figure what happens when 1
|
||||
|
||||
type
|
||||
ChunkerConfig* = object
|
||||
chunkSize*: int
|
||||
|
||||
ReadResult = object
|
||||
bytesRead: int
|
||||
hasError: bool
|
||||
error: string
|
||||
|
||||
AsyncChunker* = ref object
|
||||
config: ChunkerConfig
|
||||
pool: Taskpool
|
||||
ownsPool: bool
|
||||
|
||||
AsyncChunkStream* = ref object
|
||||
filePath: string
|
||||
fd: cint
|
||||
chunkSize: int
|
||||
offset: int64
|
||||
index: int
|
||||
finished: bool
|
||||
pool: Taskpool
|
||||
buffer: seq[byte]
|
||||
|
||||
SyncChunker* = ref object
|
||||
config: ChunkerConfig
|
||||
|
||||
SyncChunkIterator* = ref object
|
||||
file: File
|
||||
chunkSize: int
|
||||
buffer: seq[byte]
|
||||
index: int
|
||||
finished: bool
|
||||
|
||||
|
||||
proc newChunkerConfig*(chunkSize: int = DefaultChunkSize): ChunkerConfig =
|
||||
ChunkerConfig(chunkSize: chunkSize)
|
||||
|
||||
proc defaultChunkerConfig*(): ChunkerConfig =
|
||||
ChunkerConfig(chunkSize: DefaultChunkSize)
|
||||
|
||||
proc newAsyncChunker*(pool: Taskpool): AsyncChunker =
|
||||
AsyncChunker(
|
||||
config: defaultChunkerConfig(),
|
||||
pool: pool,
|
||||
ownsPool: false
|
||||
)
|
||||
|
||||
proc newAsyncChunker*(pool: Taskpool, config: ChunkerConfig): AsyncChunker =
|
||||
AsyncChunker(
|
||||
config: config,
|
||||
pool: pool,
|
||||
ownsPool: false
|
||||
)
|
||||
|
||||
proc chunkSize*(chunker: AsyncChunker): int {.inline.} =
|
||||
chunker.config.chunkSize
|
||||
|
||||
proc shutdown*(chunker: AsyncChunker) =
|
||||
if chunker.ownsPool:
|
||||
chunker.pool.shutdown()
|
||||
|
||||
proc readChunkWorker(fd: cint, offset: int64, size: int,
|
||||
buffer: ptr byte,
|
||||
signal: ThreadSignalPtr,
|
||||
resultPtr: ptr ReadResult) {.gcsafe.} =
|
||||
when defined(posix):
|
||||
let bytesRead = pread(fd, buffer, size, offset.Off)
|
||||
if bytesRead < 0:
|
||||
resultPtr[].hasError = true
|
||||
resultPtr[].error = "Read error: " & $strerror(errno)
|
||||
else:
|
||||
resultPtr[].bytesRead = bytesRead.int
|
||||
resultPtr[].hasError = false
|
||||
elif defined(windows):
|
||||
var
|
||||
overlapped: OVERLAPPED
|
||||
bytesRead: DWORD
|
||||
overlapped.Offset = cast[DWORD](offset and 0xFFFFFFFF'i64)
|
||||
overlapped.OffsetHigh = cast[DWORD](offset shr 32)
|
||||
let success = readFile(fd.Handle, buffer, size.DWORD, addr bytesRead, addr overlapped)
|
||||
if success == 0:
|
||||
resultPtr[].hasError = true
|
||||
resultPtr[].error = "Read error"
|
||||
else:
|
||||
resultPtr[].bytesRead = bytesRead.int
|
||||
resultPtr[].hasError = false
|
||||
else:
|
||||
{.error: "Unsupported platform".}
|
||||
|
||||
discard signal.fireSync()
|
||||
|
||||
proc chunkFile*(chunker: AsyncChunker, filePath: string): Future[BResult[AsyncChunkStream]] {.async.} =
|
||||
if not fileExists(filePath):
|
||||
return err(ioError("File not found: " & filePath))
|
||||
|
||||
when defined(posix):
|
||||
let fd = open(filePath.cstring, O_RDONLY)
|
||||
if fd < 0:
|
||||
return err(ioError("Cannot open file: " & filePath))
|
||||
elif defined(windows):
|
||||
let fd = createFileA(filePath.cstring, GENERIC_READ, FILE_SHARE_READ,
|
||||
nil, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0)
|
||||
if fd == INVALID_HANDLE_VALUE:
|
||||
return err(ioError("Cannot open file: " & filePath))
|
||||
else:
|
||||
{.error: "Unsupported platform".}
|
||||
|
||||
let stream = AsyncChunkStream(
|
||||
filePath: filePath,
|
||||
fd: fd.cint,
|
||||
chunkSize: chunker.config.chunkSize,
|
||||
offset: 0,
|
||||
index: 0,
|
||||
finished: false,
|
||||
pool: chunker.pool,
|
||||
buffer: newSeq[byte](chunker.config.chunkSize)
|
||||
)
|
||||
return ok(stream)
|
||||
|
||||
proc currentIndex*(stream: AsyncChunkStream): int {.inline.} =
|
||||
stream.index
|
||||
|
||||
proc isFinished*(stream: AsyncChunkStream): bool {.inline.} =
|
||||
stream.finished
|
||||
|
||||
proc nextBlock*(stream: AsyncChunkStream): Future[Option[BResult[blk.Block]]] {.async.} =
|
||||
if stream.finished:
|
||||
return none(BResult[blk.Block])
|
||||
|
||||
let signalResult = ThreadSignalPtr.new()
|
||||
if signalResult.isErr:
|
||||
stream.finished = true
|
||||
return some(BResult[blk.Block].err(ioError("Failed to create signal")))
|
||||
|
||||
let signal = signalResult.get()
|
||||
var readResult: ReadResult
|
||||
|
||||
stream.pool.spawn readChunkWorker(stream.fd, stream.offset, stream.chunkSize,
|
||||
addr stream.buffer[0], signal, addr readResult)
|
||||
|
||||
try:
|
||||
await signal.wait()
|
||||
except AsyncError as e:
|
||||
discard signal.close()
|
||||
stream.finished = true
|
||||
return some(BResult[blk.Block].err(ioError("Signal wait failed: " & e.msg)))
|
||||
except CancelledError:
|
||||
discard signal.close()
|
||||
stream.finished = true
|
||||
return some(BResult[blk.Block].err(ioError("Operation cancelled")))
|
||||
|
||||
discard signal.close()
|
||||
|
||||
if readResult.hasError:
|
||||
stream.finished = true
|
||||
return some(BResult[blk.Block].err(ioError(readResult.error)))
|
||||
|
||||
if readResult.bytesRead == 0:
|
||||
stream.finished = true
|
||||
return none(BResult[blk.Block])
|
||||
|
||||
let
|
||||
data = stream.buffer[0 ..< readResult.bytesRead]
|
||||
blockResult = blk.newBlock(data)
|
||||
|
||||
stream.offset += readResult.bytesRead
|
||||
stream.index += 1
|
||||
|
||||
return some(blockResult)
|
||||
|
||||
proc close*(stream: AsyncChunkStream) =
|
||||
if not stream.finished:
|
||||
when defined(posix):
|
||||
discard posix.close(stream.fd)
|
||||
elif defined(windows):
|
||||
discard closeHandle(stream.fd.Handle)
|
||||
stream.finished = true
|
||||
|
||||
proc newSyncChunker*(): SyncChunker =
|
||||
SyncChunker(config: defaultChunkerConfig())
|
||||
|
||||
proc newSyncChunker*(config: ChunkerConfig): SyncChunker =
|
||||
SyncChunker(config: config)
|
||||
|
||||
proc chunkFile*(chunker: SyncChunker, filePath: string): BResult[SyncChunkIterator] =
|
||||
if not fileExists(filePath):
|
||||
return err(ioError("File not found: " & filePath))
|
||||
|
||||
var file: File
|
||||
if not open(file, filePath, fmRead):
|
||||
return err(ioError("Cannot open file: " & filePath))
|
||||
|
||||
let iter = SyncChunkIterator(
|
||||
file: file,
|
||||
chunkSize: chunker.config.chunkSize,
|
||||
buffer: newSeq[byte](chunker.config.chunkSize),
|
||||
index: 0,
|
||||
finished: false
|
||||
)
|
||||
return ok(iter)
|
||||
|
||||
proc currentIndex*(iter: SyncChunkIterator): int {.inline.} =
|
||||
iter.index
|
||||
|
||||
proc isFinished*(iter: SyncChunkIterator): bool {.inline.} =
|
||||
iter.finished
|
||||
|
||||
proc nextBlock*(iter: SyncChunkIterator): Option[BResult[blk.Block]] =
|
||||
if iter.finished:
|
||||
return none(BResult[blk.Block])
|
||||
|
||||
try:
|
||||
let bytesRead = iter.file.readBytes(iter.buffer, 0, iter.chunkSize)
|
||||
|
||||
if bytesRead == 0:
|
||||
iter.finished = true
|
||||
return none(BResult[blk.Block])
|
||||
|
||||
let
|
||||
data = iter.buffer[0 ..< bytesRead]
|
||||
blockResult = blk.newBlock(data)
|
||||
iter.index += 1
|
||||
return some(blockResult)
|
||||
|
||||
except IOError as e:
|
||||
iter.finished = true
|
||||
return some(BResult[blk.Block].err(ioError(e.msg)))
|
||||
|
||||
proc close*(iter: SyncChunkIterator) =
|
||||
iter.file.close()
|
||||
iter.finished = true
|
||||
|
||||
proc chunkData*(data: openArray[byte], chunkSize: int = DefaultChunkSize): seq[BResult[blk.Block]] =
|
||||
result = @[]
|
||||
var offset = 0
|
||||
while offset < data.len:
|
||||
let
|
||||
endOffset = min(offset + chunkSize, data.len)
|
||||
chunk = data[offset ..< endOffset]
|
||||
result.add(blk.newBlock(@chunk))
|
||||
offset = endOffset
|
||||
132
blockstore/cid.nim
Normal file
132
blockstore/cid.nim
Normal file
@ -0,0 +1,132 @@
|
||||
import std/hashes
|
||||
import results
|
||||
import libp2p/cid as libp2pCid
|
||||
import libp2p/[multicodec, multihash]
|
||||
import ./errors
|
||||
|
||||
type
|
||||
Cid* = libp2pCid.Cid
|
||||
CidVersion* = libp2pCid.CidVersion
|
||||
CidError* = libp2pCid.CidError
|
||||
|
||||
const
|
||||
CIDv0* = libp2pCid.CIDv0
|
||||
CIDv1* = libp2pCid.CIDv1
|
||||
|
||||
LogosStorageManifest* = multiCodec("logos-storage-manifest")
|
||||
LogosStorageBlock* = multiCodec("logos-storage-block")
|
||||
LogosStorageTree* = multiCodec("logos-storage-tree")
|
||||
Sha256Code* = multiCodec("sha2-256")
|
||||
Sha256DigestSize* = 32
|
||||
Base32Alphabet* = "abcdefghijklmnopqrstuvwxyz234567"
|
||||
|
||||
Base32DecodeTable*: array[256, int8] = block:
|
||||
var t: array[256, int8]
|
||||
for i in 0..255:
|
||||
t[i] = -1
|
||||
for i, c in Base32Alphabet:
|
||||
t[ord(c)] = int8(i)
|
||||
t[ord(c) - 32] = int8(i) # uppercase
|
||||
t
|
||||
|
||||
proc wrap*(code: MultiCodec, digest: openArray[byte]): BResult[MultiHash] =
|
||||
let mhResult = MultiHash.init(code, digest)
|
||||
if mhResult.isErr:
|
||||
return err(multihashError("Failed to create multihash"))
|
||||
ok(mhResult.get())
|
||||
|
||||
proc newCidV1*(codec: MultiCodec, mh: MultiHash): BResult[Cid] =
|
||||
let cidResult = Cid.init(libp2pCid.CIDv1, codec, mh)
|
||||
if cidResult.isErr:
|
||||
return err(cidError("Failed to create CID: " & $cidResult.error))
|
||||
ok(cidResult.get())
|
||||
|
||||
proc toBytes*(c: Cid): seq[byte] =
|
||||
c.data.buffer
|
||||
|
||||
proc mhash*(c: Cid): Result[MultiHash, CidError] =
|
||||
libp2pCid.mhash(c)
|
||||
|
||||
proc cidFromBytes*(data: openArray[byte]): BResult[Cid] =
|
||||
let cidResult = Cid.init(data)
|
||||
if cidResult.isErr:
|
||||
return err(cidError("Failed to parse CID: " & $cidResult.error))
|
||||
ok(cidResult.get())
|
||||
|
||||
proc base32Encode*(data: openArray[byte]): string =
|
||||
if data.len == 0:
|
||||
return ""
|
||||
|
||||
result = ""
|
||||
var
|
||||
buffer: uint64 = 0
|
||||
bits = 0
|
||||
|
||||
for b in data:
|
||||
buffer = (buffer shl 8) or b.uint64
|
||||
bits += 8
|
||||
while bits >= 5:
|
||||
bits -= 5
|
||||
let idx = (buffer shr bits) and 0x1F
|
||||
result.add(Base32Alphabet[idx.int])
|
||||
|
||||
if bits > 0:
|
||||
let idx = (buffer shl (5 - bits)) and 0x1F
|
||||
result.add(Base32Alphabet[idx.int])
|
||||
|
||||
proc base32Decode*(s: string): BResult[seq[byte]] =
|
||||
if s.len == 0:
|
||||
return ok(newSeq[byte]())
|
||||
|
||||
var
|
||||
buffer: uint64 = 0
|
||||
bits = 0
|
||||
res: seq[byte] = @[]
|
||||
|
||||
for c in s:
|
||||
let idx = Base32DecodeTable[ord(c)]
|
||||
if idx < 0:
|
||||
return err(cidError("Invalid base32 character: " & $c))
|
||||
|
||||
buffer = (buffer shl 5) or idx.uint64
|
||||
bits += 5
|
||||
|
||||
if bits >= 8:
|
||||
bits -= 8
|
||||
res.add(((buffer shr bits) and 0xFF).byte)
|
||||
|
||||
ok(res)
|
||||
|
||||
proc `$`*(c: Cid): string =
|
||||
"b" & base32Encode(c.data.buffer)
|
||||
|
||||
proc cidFromString*(s: string): BResult[Cid] =
|
||||
if s.len < 2:
|
||||
return err(cidError("CID string too short"))
|
||||
|
||||
if s[0] == 'b':
|
||||
let decoded = ?base32Decode(s[1 .. ^1])
|
||||
return cidFromBytes(decoded)
|
||||
else:
|
||||
let cidResult = Cid.init(s)
|
||||
if cidResult.isErr:
|
||||
return err(cidError("Failed to parse CID: " & $cidResult.error))
|
||||
ok(cidResult.get())
|
||||
|
||||
proc `<`*(a, b: Cid): bool =
|
||||
let
|
||||
aData = a.data.buffer
|
||||
bData = b.data.buffer
|
||||
minLen = min(aData.len, bData.len)
|
||||
for i in 0 ..< minLen:
|
||||
if aData[i] < bData[i]: return true
|
||||
elif aData[i] > bData[i]: return false
|
||||
aData.len < bData.len
|
||||
|
||||
proc cmp*(a, b: Cid): int =
|
||||
if a < b: -1
|
||||
elif b < a: 1
|
||||
else: 0
|
||||
|
||||
proc hash*(c: Cid): Hash {.inline.} =
|
||||
hash(c.data.buffer)
|
||||
8
blockstore/contentids_exts.nim
Normal file
8
blockstore/contentids_exts.nim
Normal file
@ -0,0 +1,8 @@
|
||||
## LogosStorage content ID extensions for libp2p CID
|
||||
import libp2p/multicodec
|
||||
|
||||
const ContentIdsExts* = @[
|
||||
multiCodec("logos-storage-manifest"),
|
||||
multiCodec("logos-storage-block"),
|
||||
multiCodec("logos-storage-tree"),
|
||||
]
|
||||
1275
blockstore/dataset.nim
Normal file
1275
blockstore/dataset.nim
Normal file
File diff suppressed because it is too large
Load Diff
179
blockstore/directio.nim
Normal file
179
blockstore/directio.nim
Normal file
@ -0,0 +1,179 @@
|
||||
import std/os
|
||||
import results
|
||||
import ./errors
|
||||
|
||||
when defined(posix):
|
||||
import std/posix
|
||||
proc c_free(p: pointer) {.importc: "free", header: "<stdlib.h>".}
|
||||
|
||||
when defined(linux):
|
||||
const O_DIRECT* = cint(0o40000)
|
||||
|
||||
when defined(macosx):
|
||||
const F_NOCACHE* = cint(48)
|
||||
|
||||
const
|
||||
PageSize* = 4096
|
||||
MinChunkSize* = PageSize
|
||||
|
||||
type
|
||||
AlignedBuffer* = object
|
||||
data*: ptr UncheckedArray[byte]
|
||||
size*: int
|
||||
capacity*: int
|
||||
|
||||
DirectFile* = ref object
|
||||
fd: cint
|
||||
path: string
|
||||
offset: int64
|
||||
|
||||
proc isPowerOfTwo*(x: uint32): bool {.inline.} =
|
||||
x > 0 and (x and (x - 1)) == 0
|
||||
|
||||
proc alignUp*(size: int, alignment: int = PageSize): int {.inline.} =
|
||||
(size + alignment - 1) and not (alignment - 1)
|
||||
|
||||
proc newAlignedBuffer*(size: int): AlignedBuffer =
|
||||
let alignedSize = alignUp(size)
|
||||
when defined(posix):
|
||||
var p: pointer
|
||||
let rc = posix_memalign(addr p, PageSize.csize_t, alignedSize.csize_t)
|
||||
if rc != 0:
|
||||
raise newException(OutOfMemDefect, "Failed to allocate aligned memory")
|
||||
result.data = cast[ptr UncheckedArray[byte]](p)
|
||||
else:
|
||||
let
|
||||
raw = alloc0(alignedSize + PageSize)
|
||||
aligned = (cast[int](raw) + PageSize - 1) and not (PageSize - 1)
|
||||
result.data = cast[ptr UncheckedArray[byte]](aligned)
|
||||
|
||||
result.size = 0
|
||||
result.capacity = alignedSize
|
||||
zeroMem(result.data, alignedSize)
|
||||
|
||||
proc free*(buf: var AlignedBuffer) =
|
||||
if buf.data != nil:
|
||||
when defined(posix):
|
||||
c_free(buf.data)
|
||||
else:
|
||||
dealloc(buf.data)
|
||||
buf.data = nil
|
||||
buf.size = 0
|
||||
buf.capacity = 0
|
||||
|
||||
proc copyFrom*(buf: var AlignedBuffer, data: openArray[byte]) =
|
||||
if data.len > buf.capacity:
|
||||
raise newException(ValueError, "Data exceeds buffer capacity")
|
||||
|
||||
if data.len > 0:
|
||||
copyMem(buf.data, unsafeAddr data[0], data.len)
|
||||
|
||||
if data.len < buf.capacity:
|
||||
zeroMem(addr buf.data[data.len], buf.capacity - data.len)
|
||||
|
||||
buf.size = data.len
|
||||
|
||||
proc clear*(buf: var AlignedBuffer) =
|
||||
zeroMem(buf.data, buf.capacity)
|
||||
buf.size = 0
|
||||
|
||||
proc openForWrite*(path: string): BResult[DirectFile] =
|
||||
when defined(linux):
|
||||
let
|
||||
flags = O_WRONLY or O_CREAT or O_TRUNC or O_DIRECT
|
||||
fd = posix.open(path.cstring, flags, 0o644)
|
||||
if fd < 0:
|
||||
return err(ioError("Failed to open file for direct I/O: " & path & " (errno: " & $errno & ")"))
|
||||
ok(DirectFile(fd: fd, path: path, offset: 0))
|
||||
|
||||
elif defined(macosx):
|
||||
let
|
||||
flags = O_WRONLY or O_CREAT or O_TRUNC
|
||||
fd = posix.open(path.cstring, flags, 0o644)
|
||||
if fd < 0:
|
||||
return err(ioError("Failed to open file: " & path))
|
||||
if fcntl(fd, F_NOCACHE, 1) < 0:
|
||||
discard posix.close(fd)
|
||||
return err(ioError("Failed to set F_NOCACHE: " & path))
|
||||
ok(DirectFile(fd: fd, path: path, offset: 0))
|
||||
|
||||
elif defined(posix):
|
||||
err(ioError("Direct I/O not supported on this platform"))
|
||||
|
||||
else:
|
||||
err(ioError("Direct I/O not supported on this platform"))
|
||||
|
||||
proc writeAligned*(f: DirectFile, buf: AlignedBuffer): BResult[int] =
|
||||
when defined(posix):
|
||||
let
|
||||
toWrite = buf.capacity
|
||||
written = posix.write(f.fd, cast[pointer](buf.data), toWrite)
|
||||
if written < 0:
|
||||
return err(ioError("Direct write failed (errno: " & $errno & ")"))
|
||||
if written != toWrite:
|
||||
return err(ioError("Incomplete direct write: " & $written & "/" & $toWrite))
|
||||
f.offset += written
|
||||
ok(written.int)
|
||||
else:
|
||||
err(ioError("Direct I/O not supported"))
|
||||
|
||||
proc truncateFile*(f: DirectFile, size: int64): BResult[void] =
|
||||
when defined(posix):
|
||||
if ftruncate(f.fd, size.Off) < 0:
|
||||
return err(ioError("Failed to truncate file (errno: " & $errno & ")"))
|
||||
ok()
|
||||
else:
|
||||
err(ioError("Truncate not supported"))
|
||||
|
||||
proc currentOffset*(f: DirectFile): int64 {.inline.} =
|
||||
f.offset
|
||||
|
||||
proc close*(f: DirectFile) =
|
||||
if f != nil and f.fd >= 0:
|
||||
when defined(posix):
|
||||
discard posix.close(f.fd)
|
||||
f.fd = -1
|
||||
|
||||
proc sync*(f: DirectFile): BResult[void] =
|
||||
when defined(posix):
|
||||
if fsync(f.fd) < 0:
|
||||
return err(ioError("Failed to sync file"))
|
||||
ok()
|
||||
else:
|
||||
ok()
|
||||
|
||||
proc writeBlockDirect*(path: string, data: openArray[byte]): BResult[void] =
|
||||
let parentPath = parentDir(path)
|
||||
if parentPath.len > 0:
|
||||
try:
|
||||
createDir(parentPath)
|
||||
except OSError as e:
|
||||
return err(ioError("Failed to create directory: " & e.msg))
|
||||
|
||||
let fileResult = openForWrite(path)
|
||||
if fileResult.isErr:
|
||||
return err(fileResult.error)
|
||||
|
||||
let f = fileResult.value
|
||||
defer: f.close()
|
||||
|
||||
let alignedSize = alignUp(data.len)
|
||||
var buf = newAlignedBuffer(alignedSize)
|
||||
defer: buf.free()
|
||||
|
||||
buf.copyFrom(data)
|
||||
|
||||
let writeResult = f.writeAligned(buf)
|
||||
if writeResult.isErr:
|
||||
return err(writeResult.error)
|
||||
|
||||
let truncResult = f.truncateFile(data.len.int64)
|
||||
if truncResult.isErr:
|
||||
return err(truncResult.error)
|
||||
|
||||
when defined(macosx):
|
||||
let syncResult = f.sync()
|
||||
if syncResult.isErr:
|
||||
return err(syncResult.error)
|
||||
|
||||
ok()
|
||||
90
blockstore/errors.nim
Normal file
90
blockstore/errors.nim
Normal file
@ -0,0 +1,90 @@
|
||||
import std/strformat
|
||||
import results
|
||||
|
||||
type
|
||||
BlockstoreErrorKind* = enum
|
||||
IoError = "IO error"
|
||||
SerializationError = "Serialization error"
|
||||
DeserializationError = "Deserialization error"
|
||||
CidError = "CID error"
|
||||
MultihashError = "Multihash error"
|
||||
DatabaseError = "Database error"
|
||||
InvalidBlock = "Invalid block data"
|
||||
BlockNotFound = "Block not found"
|
||||
MerkleTreeError = "Merkle tree error"
|
||||
DatasetNotFound = "Dataset not found"
|
||||
QuotaExceeded = "Quota exceeded"
|
||||
InvalidProof = "Invalid merkle proof"
|
||||
InvalidProofHashLength = "Invalid merkle proof hash length"
|
||||
ManifestEncodingError = "Manifest encoding error"
|
||||
ManifestDecodingError = "Manifest decoding error"
|
||||
BackendMismatch = "Backend mismatch"
|
||||
InvalidOperation = "Invalid operation"
|
||||
|
||||
BlockstoreError* = object
|
||||
kind*: BlockstoreErrorKind
|
||||
msg*: string
|
||||
|
||||
type
|
||||
BlockstoreResult*[T] = Result[T, BlockstoreError]
|
||||
BResult*[T] = BlockstoreResult[T]
|
||||
|
||||
proc newBlockstoreError*(kind: BlockstoreErrorKind, msg: string = ""): BlockstoreError =
|
||||
BlockstoreError(kind: kind, msg: msg)
|
||||
|
||||
proc ioError*(msg: string): BlockstoreError =
|
||||
newBlockstoreError(IoError, msg)
|
||||
|
||||
proc serializationError*(msg: string): BlockstoreError =
|
||||
newBlockstoreError(SerializationError, msg)
|
||||
|
||||
proc deserializationError*(msg: string): BlockstoreError =
|
||||
newBlockstoreError(DeserializationError, msg)
|
||||
|
||||
proc cidError*(msg: string): BlockstoreError =
|
||||
newBlockstoreError(CidError, msg)
|
||||
|
||||
proc multihashError*(msg: string): BlockstoreError =
|
||||
newBlockstoreError(MultihashError, msg)
|
||||
|
||||
proc databaseError*(msg: string): BlockstoreError =
|
||||
newBlockstoreError(DatabaseError, msg)
|
||||
|
||||
proc invalidBlockError*(): BlockstoreError =
|
||||
newBlockstoreError(InvalidBlock)
|
||||
|
||||
proc blockNotFoundError*(cid: string): BlockstoreError =
|
||||
newBlockstoreError(BlockNotFound, cid)
|
||||
|
||||
proc merkleTreeError*(msg: string): BlockstoreError =
|
||||
newBlockstoreError(MerkleTreeError, msg)
|
||||
|
||||
proc datasetNotFoundError*(): BlockstoreError =
|
||||
newBlockstoreError(DatasetNotFound)
|
||||
|
||||
proc quotaExceededError*(): BlockstoreError =
|
||||
newBlockstoreError(QuotaExceeded)
|
||||
|
||||
proc invalidProofError*(): BlockstoreError =
|
||||
newBlockstoreError(InvalidProof)
|
||||
|
||||
proc invalidProofHashLengthError*(): BlockstoreError =
|
||||
newBlockstoreError(InvalidProofHashLength)
|
||||
|
||||
proc manifestEncodingError*(msg: string): BlockstoreError =
|
||||
newBlockstoreError(ManifestEncodingError, msg)
|
||||
|
||||
proc manifestDecodingError*(msg: string): BlockstoreError =
|
||||
newBlockstoreError(ManifestDecodingError, msg)
|
||||
|
||||
proc backendMismatchError*(msg: string): BlockstoreError =
|
||||
newBlockstoreError(BackendMismatch, msg)
|
||||
|
||||
proc invalidOperationError*(msg: string): BlockstoreError =
|
||||
newBlockstoreError(InvalidOperation, msg)
|
||||
|
||||
proc `$`*(e: BlockstoreError): string =
|
||||
if e.msg.len > 0:
|
||||
fmt"{e.kind}: {e.msg}"
|
||||
else:
|
||||
$e.kind
|
||||
287
blockstore/ioutils.nim
Normal file
287
blockstore/ioutils.nim
Normal file
@ -0,0 +1,287 @@
|
||||
import std/os
|
||||
import results
|
||||
import ./errors
|
||||
import ./directio
|
||||
|
||||
when defined(posix):
|
||||
import std/posix
|
||||
|
||||
export PageSize, MinChunkSize, isPowerOfTwo, alignUp
|
||||
export AlignedBuffer, newAlignedBuffer, free, copyFrom, clear
|
||||
|
||||
type
|
||||
IOMode* = enum
|
||||
ioDirect
|
||||
ioBuffered
|
||||
|
||||
SyncPolicyKind* = enum
|
||||
spNone
|
||||
spEveryWrite
|
||||
spEveryN
|
||||
|
||||
SyncPolicy* = object
|
||||
case kind*: SyncPolicyKind
|
||||
of spNone: discard
|
||||
of spEveryWrite: discard
|
||||
of spEveryN: n*: int
|
||||
|
||||
WriteHandle* = ref object
|
||||
case mode*: IOMode
|
||||
of ioDirect:
|
||||
directFile: DirectFile
|
||||
alignedBuf: AlignedBuffer
|
||||
of ioBuffered:
|
||||
file: File
|
||||
path: string
|
||||
offset: int64
|
||||
chunkSize: int
|
||||
syncPolicy: SyncPolicy
|
||||
writeCount: int
|
||||
|
||||
proc syncNone*(): SyncPolicy =
|
||||
SyncPolicy(kind: spNone)
|
||||
|
||||
proc syncEveryWrite*(): SyncPolicy =
|
||||
SyncPolicy(kind: spEveryWrite)
|
||||
|
||||
proc syncEveryN*(n: int): SyncPolicy =
|
||||
SyncPolicy(kind: spEveryN, n: n)
|
||||
|
||||
proc openForWrite*(path: string, mode: IOMode, chunkSize: int,
|
||||
syncPolicy: SyncPolicy = syncNone()): BResult[WriteHandle] =
|
||||
let parentPath = parentDir(path)
|
||||
if parentPath.len > 0:
|
||||
try:
|
||||
createDir(parentPath)
|
||||
except OSError as e:
|
||||
return err(ioError("Failed to create directory: " & e.msg))
|
||||
|
||||
case mode
|
||||
of ioDirect:
|
||||
let dfResult = directio.openForWrite(path)
|
||||
if dfResult.isErr:
|
||||
return err(dfResult.error)
|
||||
|
||||
let alignedSize = alignUp(chunkSize)
|
||||
var buf = newAlignedBuffer(alignedSize)
|
||||
|
||||
ok(WriteHandle(
|
||||
mode: ioDirect,
|
||||
directFile: dfResult.value,
|
||||
alignedBuf: buf,
|
||||
path: path,
|
||||
offset: 0,
|
||||
chunkSize: chunkSize,
|
||||
syncPolicy: syncPolicy,
|
||||
writeCount: 0
|
||||
))
|
||||
|
||||
of ioBuffered:
|
||||
try:
|
||||
let f = open(path, fmWrite)
|
||||
ok(WriteHandle(
|
||||
mode: ioBuffered,
|
||||
file: f,
|
||||
path: path,
|
||||
offset: 0,
|
||||
chunkSize: chunkSize,
|
||||
syncPolicy: syncPolicy,
|
||||
writeCount: 0
|
||||
))
|
||||
except IOError as e:
|
||||
err(ioError("Failed to open file: " & e.msg))
|
||||
|
||||
proc shouldSync(h: WriteHandle): bool {.inline.} =
|
||||
case h.syncPolicy.kind
|
||||
of spNone: false
|
||||
of spEveryWrite: true
|
||||
of spEveryN: h.writeCount mod h.syncPolicy.n == 0
|
||||
|
||||
proc syncFile(h: WriteHandle): BResult[void] =
|
||||
case h.mode
|
||||
of ioDirect:
|
||||
when defined(macosx):
|
||||
let syncResult = h.directFile.sync()
|
||||
if syncResult.isErr:
|
||||
return err(syncResult.error)
|
||||
ok()
|
||||
of ioBuffered:
|
||||
try:
|
||||
h.file.flushFile()
|
||||
when defined(posix):
|
||||
if fsync(h.file.getFileHandle().cint) < 0:
|
||||
return err(ioError("Sync failed"))
|
||||
ok()
|
||||
except IOError as e:
|
||||
err(ioError("Sync failed: " & e.msg))
|
||||
|
||||
proc writeBlock*(h: WriteHandle, data: openArray[byte]): BResult[int] =
|
||||
case h.mode
|
||||
of ioDirect:
|
||||
h.alignedBuf.copyFrom(data)
|
||||
let writeResult = h.directFile.writeAligned(h.alignedBuf)
|
||||
if writeResult.isErr:
|
||||
return err(writeResult.error)
|
||||
|
||||
h.offset += data.len.int64
|
||||
h.writeCount += 1
|
||||
|
||||
if h.shouldSync():
|
||||
let syncResult = h.syncFile()
|
||||
if syncResult.isErr:
|
||||
return err(syncResult.error)
|
||||
|
||||
ok(data.len)
|
||||
|
||||
of ioBuffered:
|
||||
if h.syncPolicy.kind == spNone:
|
||||
let written = h.file.writeBytes(data, 0, data.len)
|
||||
h.offset += written.int64
|
||||
return ok(written)
|
||||
|
||||
try:
|
||||
let written = h.file.writeBytes(data, 0, data.len)
|
||||
if written != data.len:
|
||||
return err(ioError("Incomplete write: " & $written & "/" & $data.len))
|
||||
h.offset += written.int64
|
||||
h.writeCount += 1
|
||||
|
||||
if h.shouldSync():
|
||||
let syncResult = h.syncFile()
|
||||
if syncResult.isErr:
|
||||
return err(syncResult.error)
|
||||
|
||||
ok(written)
|
||||
except IOError as e:
|
||||
err(ioError("Write failed: " & e.msg))
|
||||
|
||||
proc currentOffset*(h: WriteHandle): int64 {.inline.} =
|
||||
h.offset
|
||||
|
||||
proc finalize*(h: WriteHandle, actualSize: int64): BResult[void] =
|
||||
case h.mode
|
||||
of ioDirect:
|
||||
let truncResult = h.directFile.truncateFile(actualSize)
|
||||
if truncResult.isErr:
|
||||
return err(truncResult.error)
|
||||
|
||||
when defined(macosx):
|
||||
let syncResult = h.directFile.sync()
|
||||
if syncResult.isErr:
|
||||
return err(syncResult.error)
|
||||
|
||||
ok()
|
||||
|
||||
of ioBuffered:
|
||||
try:
|
||||
h.file.flushFile()
|
||||
when defined(posix):
|
||||
if fsync(h.file.getFileHandle().cint) < 0:
|
||||
return err(ioError("Sync failed"))
|
||||
ok()
|
||||
except IOError as e:
|
||||
err(ioError("Finalize failed: " & e.msg))
|
||||
|
||||
proc close*(h: WriteHandle) =
|
||||
case h.mode
|
||||
of ioDirect:
|
||||
h.directFile.close()
|
||||
var buf = h.alignedBuf
|
||||
buf.free()
|
||||
of ioBuffered:
|
||||
try:
|
||||
h.file.close()
|
||||
except CatchableError:
|
||||
discard
|
||||
|
||||
proc writeBlockToFile*(path: string, data: openArray[byte], mode: IOMode): BResult[void] =
|
||||
let parentPath = parentDir(path)
|
||||
if parentPath.len > 0:
|
||||
try:
|
||||
createDir(parentPath)
|
||||
except OSError as e:
|
||||
return err(ioError("Failed to create directory: " & e.msg))
|
||||
|
||||
case mode
|
||||
of ioDirect:
|
||||
directio.writeBlockDirect(path, data)
|
||||
|
||||
of ioBuffered:
|
||||
try:
|
||||
var f = open(path, fmWrite)
|
||||
defer: f.close()
|
||||
|
||||
let written = f.writeBytes(data, 0, data.len)
|
||||
if written != data.len:
|
||||
return err(ioError("Incomplete write"))
|
||||
|
||||
ok()
|
||||
except IOError as e:
|
||||
err(ioError("Write failed: " & e.msg))
|
||||
|
||||
proc writeBlockBuffered*(path: string, data: openArray[byte]): BResult[File] =
|
||||
let parentPath = parentDir(path)
|
||||
if parentPath.len > 0:
|
||||
try:
|
||||
createDir(parentPath)
|
||||
except OSError as e:
|
||||
return err(ioError("Failed to create directory: " & e.msg))
|
||||
|
||||
try:
|
||||
var f = open(path, fmWrite)
|
||||
let written = f.writeBytes(data, 0, data.len)
|
||||
if written != data.len:
|
||||
f.close()
|
||||
return err(ioError("Incomplete write"))
|
||||
ok(f)
|
||||
except IOError as e:
|
||||
err(ioError("Write failed: " & e.msg))
|
||||
|
||||
proc syncAndCloseFile*(f: File): BResult[void] =
|
||||
try:
|
||||
f.flushFile()
|
||||
when defined(posix):
|
||||
if fsync(f.getFileHandle().cint) < 0:
|
||||
f.close()
|
||||
return err(ioError("Sync failed"))
|
||||
when defined(windows):
|
||||
import std/winlean
|
||||
if flushFileBuffers(f.getFileHandle()) == 0:
|
||||
f.close()
|
||||
return err(ioError("Sync failed"))
|
||||
f.close()
|
||||
ok()
|
||||
except IOError as e:
|
||||
try: f.close()
|
||||
except CatchableError: discard
|
||||
err(ioError("Sync failed: " & e.msg))
|
||||
|
||||
proc validateChunkSize*(chunkSize: uint32): BResult[void] =
|
||||
if chunkSize < PageSize.uint32:
|
||||
return err(ioError("Chunk size must be >= " & $PageSize & " bytes"))
|
||||
if not isPowerOfTwo(chunkSize):
|
||||
return err(ioError("Chunk size must be power of 2"))
|
||||
ok()
|
||||
|
||||
proc syncDir*(dirPath: string): BResult[void] =
|
||||
when defined(posix):
|
||||
let fd = posix.open(dirPath.cstring, O_RDONLY)
|
||||
if fd < 0:
|
||||
return err(ioError("Failed to open directory for sync: " & dirPath))
|
||||
if fsync(fd) < 0:
|
||||
discard posix.close(fd)
|
||||
return err(ioError("Failed to sync directory: " & dirPath))
|
||||
discard posix.close(fd)
|
||||
ok()
|
||||
else:
|
||||
ok()
|
||||
|
||||
proc atomicRename*(srcPath: string, dstPath: string): BResult[void] =
|
||||
try:
|
||||
moveFile(srcPath, dstPath)
|
||||
?syncDir(parentDir(dstPath))
|
||||
ok()
|
||||
except OSError as e:
|
||||
err(ioError("Failed to rename file: " & e.msg))
|
||||
except Exception as e:
|
||||
err(ioError("Failed to rename file: " & e.msg))
|
||||
165
blockstore/manifest.nim
Normal file
165
blockstore/manifest.nim
Normal file
@ -0,0 +1,165 @@
|
||||
import std/[math, streams, options, strutils]
|
||||
import results
|
||||
import libp2p/multicodec
|
||||
import ./errors
|
||||
import ./cid
|
||||
import ./sha256
|
||||
import ./serialization as ser
|
||||
|
||||
const
|
||||
LogosStorageBlockCodec* = 0xCD02'u32
|
||||
Sha256Hcodec* = 0x12'u32
|
||||
CidVersionV1* = 1'u8
|
||||
|
||||
type
|
||||
Manifest* = object
|
||||
treeCid*: seq[byte]
|
||||
blockSize*: uint32
|
||||
datasetSize*: uint64
|
||||
codec*: uint32
|
||||
hcodec*: uint32
|
||||
version*: uint8
|
||||
filename*: Option[string]
|
||||
mimetype*: Option[string]
|
||||
|
||||
proc newManifest*(treeCid: seq[byte], blockSize: uint32, datasetSize: uint64): Manifest =
|
||||
Manifest(
|
||||
treeCid: treeCid,
|
||||
blockSize: blockSize,
|
||||
datasetSize: datasetSize,
|
||||
codec: LogosStorageBlockCodec,
|
||||
hcodec: Sha256Hcodec,
|
||||
version: CidVersionV1,
|
||||
filename: none(string),
|
||||
mimetype: none(string)
|
||||
)
|
||||
|
||||
proc newManifest*(
|
||||
treeCid: seq[byte],
|
||||
blockSize: uint32,
|
||||
datasetSize: uint64,
|
||||
filename: Option[string],
|
||||
mimetype: Option[string]
|
||||
): Manifest =
|
||||
Manifest(
|
||||
treeCid: treeCid,
|
||||
blockSize: blockSize,
|
||||
datasetSize: datasetSize,
|
||||
codec: LogosStorageBlockCodec,
|
||||
hcodec: Sha256Hcodec,
|
||||
version: CidVersionV1,
|
||||
filename: filename,
|
||||
mimetype: mimetype
|
||||
)
|
||||
|
||||
proc blocksCount*(m: Manifest): int =
|
||||
int(ceilDiv(m.datasetSize, m.blockSize.uint64))
|
||||
|
||||
proc validate*(m: Manifest): BResult[void] =
|
||||
if m.treeCid.len == 0:
|
||||
return err(manifestDecodingError("tree_cid cannot be empty"))
|
||||
|
||||
if m.blockSize == 0:
|
||||
return err(manifestDecodingError("block_size must be greater than 0"))
|
||||
|
||||
if m.codec != LogosStorageBlockCodec:
|
||||
return err(manifestDecodingError(
|
||||
"invalid codec: expected 0xCD02, got 0x" & m.codec.toHex
|
||||
))
|
||||
|
||||
if m.hcodec != Sha256Hcodec:
|
||||
return err(manifestDecodingError(
|
||||
"invalid hcodec: expected 0x12 (sha2-256), got 0x" & m.hcodec.toHex
|
||||
))
|
||||
|
||||
if m.version != CidVersionV1:
|
||||
return err(manifestDecodingError(
|
||||
"invalid version: expected 1, got " & $m.version
|
||||
))
|
||||
|
||||
ok()
|
||||
|
||||
proc encodeManifest*(m: Manifest): BResult[seq[byte]] =
|
||||
try:
|
||||
let s = newStringStream()
|
||||
|
||||
ser.writeBytes(s, m.treeCid)
|
||||
ser.writeUint32(s, m.blockSize)
|
||||
ser.writeUint64(s, m.datasetSize)
|
||||
ser.writeUint32(s, m.codec)
|
||||
ser.writeUint32(s, m.hcodec)
|
||||
ser.writeUint8(s, m.version)
|
||||
|
||||
if m.filename.isSome:
|
||||
ser.writeBool(s, true)
|
||||
ser.writeString(s, m.filename.get)
|
||||
else:
|
||||
ser.writeBool(s, false)
|
||||
|
||||
if m.mimetype.isSome:
|
||||
ser.writeBool(s, true)
|
||||
ser.writeString(s, m.mimetype.get)
|
||||
else:
|
||||
ser.writeBool(s, false)
|
||||
|
||||
s.setPosition(0)
|
||||
ok(cast[seq[byte]](s.readAll()))
|
||||
except CatchableError as e:
|
||||
err(manifestEncodingError(e.msg))
|
||||
|
||||
proc decodeManifest*(data: openArray[byte]): BResult[Manifest] =
|
||||
try:
|
||||
let dataCopy = @data
|
||||
let s = newStringStream(cast[string](dataCopy))
|
||||
|
||||
var m: Manifest
|
||||
m.treeCid = ?ser.readBytes(s)
|
||||
m.blockSize = ser.readUint32(s)
|
||||
m.datasetSize = ser.readUint64(s)
|
||||
m.codec = ser.readUint32(s)
|
||||
m.hcodec = ser.readUint32(s)
|
||||
m.version = ser.readUint8(s)
|
||||
|
||||
if ser.readBool(s):
|
||||
m.filename = some(?ser.readString(s))
|
||||
else:
|
||||
m.filename = none(string)
|
||||
|
||||
if ser.readBool(s):
|
||||
m.mimetype = some(?ser.readString(s))
|
||||
else:
|
||||
m.mimetype = none(string)
|
||||
|
||||
ok(m)
|
||||
except CatchableError as e:
|
||||
err(manifestDecodingError(e.msg))
|
||||
|
||||
proc toCid*(m: Manifest): BResult[Cid] =
|
||||
let encoded = ?encodeManifest(m)
|
||||
let hash = sha256Hash(encoded)
|
||||
let mh = ?wrap(Sha256Code, hash)
|
||||
newCidV1(LogosStorageManifest, mh)
|
||||
|
||||
proc fromCidData*(c: Cid, data: openArray[byte]): BResult[Manifest] =
|
||||
if c.mcodec != LogosStorageManifest:
|
||||
return err(cidError(
|
||||
"Expected manifest codec 0xCD01, got 0x" & int(c.mcodec).toHex
|
||||
))
|
||||
|
||||
let
|
||||
manifest = ?decodeManifest(data)
|
||||
computedCid = ?manifest.toCid()
|
||||
|
||||
if computedCid != c:
|
||||
return err(cidError("Manifest CID mismatch"))
|
||||
|
||||
ok(manifest)
|
||||
|
||||
proc `$`*(m: Manifest): string =
|
||||
result = "Manifest("
|
||||
result.add("blockSize=" & $m.blockSize)
|
||||
result.add(", datasetSize=" & $m.datasetSize)
|
||||
result.add(", blocks=" & $m.blocksCount)
|
||||
if m.filename.isSome:
|
||||
result.add(", filename=" & m.filename.get)
|
||||
result.add(")")
|
||||
692
blockstore/merkle.nim
Normal file
692
blockstore/merkle.nim
Normal file
@ -0,0 +1,692 @@
|
||||
import std/[sets, options, memfiles, bitops, os]
|
||||
when defined(posix):
|
||||
import std/posix
|
||||
import results
|
||||
import libp2p/multicodec
|
||||
import leveldbstatic as leveldb
|
||||
import ./errors
|
||||
import ./cid
|
||||
import ./sha256
|
||||
|
||||
const
|
||||
HashSize* = 32
|
||||
MerkleTreePrefix* = "merkle:"
|
||||
MetadataKey = ":meta"
|
||||
|
||||
PackedMagic = 0x534B4C4D'u32
|
||||
PackedVersion = 4'u32
|
||||
HeaderSize = 17
|
||||
EntrySize = 32
|
||||
|
||||
type
|
||||
MerkleBackend* = enum
|
||||
mbEmbeddedProofs
|
||||
mbLevelDb
|
||||
mbPacked
|
||||
|
||||
MerkleHash* = array[HashSize, byte]
|
||||
|
||||
MerkleStorage* = ref object of RootObj
|
||||
|
||||
LevelDbMerkleStorage* = ref object of MerkleStorage
|
||||
db: LevelDb
|
||||
treeId: string
|
||||
|
||||
PackedMerkleStorage* = ref object of MerkleStorage
|
||||
path: string
|
||||
file: File
|
||||
memFile: MemFile
|
||||
leafCount: uint64
|
||||
numLevels: int
|
||||
levelFiles: seq[File]
|
||||
readOnly: bool
|
||||
|
||||
StreamingMerkleBuilder* = ref object
|
||||
frontier: seq[Option[MerkleHash]]
|
||||
pendingIndices: seq[uint64]
|
||||
leafCount: uint64
|
||||
storage: MerkleStorage
|
||||
|
||||
MerkleReader* = ref object
|
||||
storage: MerkleStorage
|
||||
|
||||
MerkleProofNode* = object
|
||||
hash*: MerkleHash
|
||||
level*: int
|
||||
|
||||
MerkleProof* = object
|
||||
index*: uint64
|
||||
path*: seq[MerkleProofNode]
|
||||
leafCount*: uint64
|
||||
|
||||
MerkleTreeBuilder* = ref object
|
||||
leaves: seq[array[32, byte]]
|
||||
tree: seq[seq[array[32, byte]]]
|
||||
built: bool
|
||||
|
||||
method putHash*(s: MerkleStorage, level: int, index: uint64, hash: MerkleHash): BResult[void] {.base, raises: [].}
|
||||
method getHash*(s: MerkleStorage, level: int, index: uint64): Option[MerkleHash] {.base, raises: [].}
|
||||
method setMetadata*(s: MerkleStorage, leafCount: uint64, numLevels: int): BResult[void] {.base, raises: [].}
|
||||
method getMetadata*(s: MerkleStorage): tuple[leafCount: uint64, numLevels: int] {.base, raises: [].}
|
||||
method close*(s: MerkleStorage): BResult[void] {.base, gcsafe, raises: [].}
|
||||
method flush*(s: MerkleStorage) {.base, gcsafe, raises: [].}
|
||||
|
||||
proc computeNumLevels*(leafCount: uint64): int =
|
||||
if leafCount == 0: return 0
|
||||
if leafCount == 1: return 1
|
||||
fastLog2(leafCount - 1) + 2
|
||||
|
||||
proc nodesAtLevel*(leafCount: uint64, level: int): uint64 =
|
||||
if leafCount == 0: return 0
|
||||
if level == 0: return leafCount
|
||||
if level >= 64:
|
||||
return if level == 64: 1 else: 0
|
||||
let
|
||||
shifted = leafCount shr level
|
||||
mask = (1'u64 shl level) - 1
|
||||
if (leafCount and mask) > 0: shifted + 1 else: shifted
|
||||
|
||||
proc nodesBeforeLevel*(leafCount: uint64, level: int): uint64 =
|
||||
result = 0
|
||||
for l in 0 ..< level:
|
||||
result += nodesAtLevel(leafCount, l)
|
||||
|
||||
proc nodePosition*(leafCount: uint64, level: int, index: uint64): uint64 =
|
||||
nodesBeforeLevel(leafCount, level) + index
|
||||
|
||||
proc hashConcat*(left, right: MerkleHash): MerkleHash =
|
||||
var combined: array[64, byte]
|
||||
copyMem(addr combined[0], unsafeAddr left[0], 32)
|
||||
copyMem(addr combined[32], unsafeAddr right[0], 32)
|
||||
sha256Hash(combined)
|
||||
|
||||
method putHash*(s: MerkleStorage, level: int, index: uint64, hash: MerkleHash): BResult[void] {.base, raises: [].} =
|
||||
err(ioError("putHash not implemented"))
|
||||
|
||||
method getHash*(s: MerkleStorage, level: int, index: uint64): Option[MerkleHash] {.base, raises: [].} =
|
||||
none(MerkleHash)
|
||||
|
||||
method setMetadata*(s: MerkleStorage, leafCount: uint64, numLevels: int): BResult[void] {.base, raises: [].} =
|
||||
err(ioError("setMetadata not implemented"))
|
||||
|
||||
method getMetadata*(s: MerkleStorage): tuple[leafCount: uint64, numLevels: int] {.base, raises: [].} =
|
||||
(0'u64, 0)
|
||||
|
||||
method close*(s: MerkleStorage): BResult[void] {.base, gcsafe, raises: [].} =
|
||||
ok()
|
||||
|
||||
method flush*(s: MerkleStorage) {.base, gcsafe, raises: [].} =
|
||||
discard
|
||||
|
||||
proc levelDbKey(treeId: string, level: int, index: uint64): string =
|
||||
MerkleTreePrefix & treeId & ":L" & $level & ":I" & $index
|
||||
|
||||
proc levelDbMetaKey(treeId: string): string =
|
||||
MerkleTreePrefix & treeId & MetadataKey
|
||||
|
||||
proc newLevelDbMerkleStorage*(db: LevelDb, treeId: string): LevelDbMerkleStorage =
|
||||
LevelDbMerkleStorage(db: db, treeId: treeId)
|
||||
|
||||
method putHash*(s: LevelDbMerkleStorage, level: int, index: uint64, hash: MerkleHash): BResult[void] {.raises: [].} =
|
||||
let key = levelDbKey(s.treeId, level, index)
|
||||
try:
|
||||
s.db.put(key, cast[string](@hash))
|
||||
ok()
|
||||
except CatchableError as e:
|
||||
err(databaseError(e.msg))
|
||||
except Exception as e:
|
||||
err(databaseError(e.msg))
|
||||
|
||||
method getHash*(s: LevelDbMerkleStorage, level: int, index: uint64): Option[MerkleHash] {.raises: [].} =
|
||||
let key = levelDbKey(s.treeId, level, index)
|
||||
try:
|
||||
let valueOpt = s.db.get(key)
|
||||
if valueOpt.isNone or valueOpt.get.len != HashSize:
|
||||
return none(MerkleHash)
|
||||
var hash: MerkleHash
|
||||
copyMem(addr hash[0], unsafeAddr valueOpt.get[0], HashSize)
|
||||
some(hash)
|
||||
except CatchableError:
|
||||
none(MerkleHash)
|
||||
except Exception:
|
||||
none(MerkleHash)
|
||||
|
||||
method setMetadata*(s: LevelDbMerkleStorage, leafCount: uint64, numLevels: int): BResult[void] {.raises: [].} =
|
||||
let key = levelDbMetaKey(s.treeId)
|
||||
var data: array[9, byte]
|
||||
copyMem(addr data[0], unsafeAddr leafCount, 8)
|
||||
var nl = numLevels.uint8
|
||||
copyMem(addr data[8], unsafeAddr nl, 1)
|
||||
try:
|
||||
s.db.put(key, cast[string](@data))
|
||||
ok()
|
||||
except CatchableError as e:
|
||||
err(databaseError(e.msg))
|
||||
except Exception as e:
|
||||
err(databaseError(e.msg))
|
||||
|
||||
method getMetadata*(s: LevelDbMerkleStorage): tuple[leafCount: uint64, numLevels: int] {.raises: [].} =
|
||||
let key = levelDbMetaKey(s.treeId)
|
||||
try:
|
||||
let valueOpt = s.db.get(key)
|
||||
if valueOpt.isNone or valueOpt.get.len < 9:
|
||||
return (0'u64, 0)
|
||||
var
|
||||
leafCount: uint64
|
||||
numLevels: uint8
|
||||
copyMem(addr leafCount, unsafeAddr valueOpt.get[0], 8)
|
||||
copyMem(addr numLevels, unsafeAddr valueOpt.get[8], 1)
|
||||
(leafCount, numLevels.int)
|
||||
except CatchableError:
|
||||
(0'u64, 0)
|
||||
except Exception:
|
||||
(0'u64, 0)
|
||||
|
||||
proc levelTempPath(basePath: string, level: int): string =
|
||||
basePath & ".L" & $level & ".tmp"
|
||||
|
||||
proc newPackedMerkleStorage*(path: string, forWriting: bool = false): BResult[PackedMerkleStorage] =
|
||||
var storage = PackedMerkleStorage(
|
||||
path: path,
|
||||
levelFiles: @[]
|
||||
)
|
||||
|
||||
if forWriting:
|
||||
storage.readOnly = false
|
||||
storage.leafCount = 0
|
||||
storage.numLevels = 0
|
||||
storage.file = syncio.open(path, fmReadWrite)
|
||||
|
||||
var header: array[HeaderSize, byte]
|
||||
var magic = PackedMagic
|
||||
var version = PackedVersion
|
||||
copyMem(addr header[0], addr magic, 4)
|
||||
copyMem(addr header[4], addr version, 4)
|
||||
let written = storage.file.writeBuffer(addr header[0], HeaderSize)
|
||||
if written != HeaderSize:
|
||||
storage.file.close()
|
||||
return err(ioError("Failed to write packed merkle header"))
|
||||
else:
|
||||
storage.readOnly = true
|
||||
storage.memFile = memfiles.open(path, mode = fmRead)
|
||||
|
||||
let
|
||||
data = cast[ptr UncheckedArray[byte]](storage.memFile.mem)
|
||||
fileSize = storage.memFile.size
|
||||
|
||||
if fileSize < HeaderSize:
|
||||
storage.memFile.close()
|
||||
return err(ioError("File too small for header"))
|
||||
|
||||
var
|
||||
magic: uint32
|
||||
version: uint32
|
||||
nl: uint8
|
||||
copyMem(addr magic, addr data[0], 4)
|
||||
copyMem(addr version, addr data[4], 4)
|
||||
copyMem(addr storage.leafCount, addr data[8], 8)
|
||||
copyMem(addr nl, addr data[16], 1)
|
||||
storage.numLevels = nl.int
|
||||
|
||||
if magic != PackedMagic:
|
||||
storage.memFile.close()
|
||||
return err(ioError("Invalid packed merkle file magic"))
|
||||
if version != PackedVersion:
|
||||
storage.memFile.close()
|
||||
return err(ioError("Unsupported packed merkle file version: " & $version))
|
||||
|
||||
ok(storage)
|
||||
|
||||
method putHash*(s: PackedMerkleStorage, level: int, index: uint64, hash: MerkleHash): BResult[void] {.raises: [].} =
|
||||
if s.readOnly:
|
||||
return err(ioError("Storage is read-only"))
|
||||
|
||||
try:
|
||||
if level == 0:
|
||||
let offset = HeaderSize + index.int64 * EntrySize
|
||||
s.file.setFilePos(offset)
|
||||
let written = s.file.writeBuffer(unsafeAddr hash[0], HashSize)
|
||||
if written != HashSize:
|
||||
return err(ioError("Failed to write hash at level 0"))
|
||||
else:
|
||||
while s.levelFiles.len < level:
|
||||
let tempPath = levelTempPath(s.path, s.levelFiles.len + 1)
|
||||
s.levelFiles.add(syncio.open(tempPath, fmReadWrite))
|
||||
let f = s.levelFiles[level - 1]
|
||||
f.setFilePos(index.int64 * HashSize)
|
||||
let written = f.writeBuffer(unsafeAddr hash[0], HashSize)
|
||||
if written != HashSize:
|
||||
return err(ioError("Failed to write hash at level " & $level))
|
||||
ok()
|
||||
except CatchableError as e:
|
||||
err(ioError(e.msg))
|
||||
except Exception as e:
|
||||
err(ioError(e.msg))
|
||||
|
||||
method getHash*(s: PackedMerkleStorage, level: int, index: uint64): Option[MerkleHash] {.raises: [].} =
|
||||
if s.leafCount == 0:
|
||||
return none(MerkleHash)
|
||||
|
||||
if index >= nodesAtLevel(s.leafCount, level):
|
||||
return none(MerkleHash)
|
||||
|
||||
let
|
||||
position = nodePosition(s.leafCount, level, index)
|
||||
offset = HeaderSize + position.int64 * EntrySize
|
||||
|
||||
try:
|
||||
if s.readOnly:
|
||||
if offset + HashSize > s.memFile.size:
|
||||
return none(MerkleHash)
|
||||
let data = cast[ptr UncheckedArray[byte]](s.memFile.mem)
|
||||
var hash: MerkleHash
|
||||
copyMem(addr hash[0], addr data[offset], HashSize)
|
||||
return some(hash)
|
||||
else:
|
||||
s.file.setFilePos(offset)
|
||||
var hash: MerkleHash
|
||||
let bytesRead = s.file.readBuffer(addr hash[0], HashSize)
|
||||
if bytesRead != HashSize:
|
||||
return none(MerkleHash)
|
||||
return some(hash)
|
||||
except CatchableError:
|
||||
none(MerkleHash)
|
||||
except Exception:
|
||||
none(MerkleHash)
|
||||
|
||||
method setMetadata*(s: PackedMerkleStorage, leafCount: uint64, numLevels: int): BResult[void] {.raises: [].} =
|
||||
if s.readOnly:
|
||||
return ok()
|
||||
s.leafCount = leafCount
|
||||
s.numLevels = numLevels
|
||||
|
||||
try:
|
||||
s.file.setFilePos(8)
|
||||
var
|
||||
lc = leafCount
|
||||
nl = numLevels.uint8
|
||||
var written = s.file.writeBuffer(addr lc, 8)
|
||||
if written != 8:
|
||||
return err(ioError("Failed to write leaf count"))
|
||||
written = s.file.writeBuffer(addr nl, 1)
|
||||
if written != 1:
|
||||
return err(ioError("Failed to write num levels"))
|
||||
ok()
|
||||
except CatchableError as e:
|
||||
err(ioError(e.msg))
|
||||
except Exception as e:
|
||||
err(ioError(e.msg))
|
||||
|
||||
method getMetadata*(s: PackedMerkleStorage): tuple[leafCount: uint64, numLevels: int] {.raises: [].} =
|
||||
(s.leafCount, s.numLevels)
|
||||
|
||||
method close*(s: PackedMerkleStorage): BResult[void] {.gcsafe, raises: [].} =
|
||||
try:
|
||||
if s.readOnly:
|
||||
s.memFile.close()
|
||||
else:
|
||||
s.file.setFilePos(0, fspEnd)
|
||||
var buffer: array[4096, byte]
|
||||
for i, levelFile in s.levelFiles:
|
||||
flushFile(levelFile)
|
||||
levelFile.setFilePos(0)
|
||||
while true:
|
||||
let bytesRead = levelFile.readBuffer(addr buffer[0], buffer.len)
|
||||
if bytesRead == 0:
|
||||
break
|
||||
let written = s.file.writeBuffer(addr buffer[0], bytesRead)
|
||||
if written != bytesRead:
|
||||
return err(ioError("Failed to write level " & $(i + 1) & " data"))
|
||||
levelFile.close()
|
||||
removeFile(levelTempPath(s.path, i + 1))
|
||||
s.levelFiles = @[]
|
||||
|
||||
flushFile(s.file)
|
||||
when defined(posix):
|
||||
if fsync(s.file.getFileHandle().cint) != 0:
|
||||
return err(ioError("fsync failed"))
|
||||
s.file.close()
|
||||
ok()
|
||||
except CatchableError as e:
|
||||
err(ioError(e.msg))
|
||||
except Exception as e:
|
||||
err(ioError(e.msg))
|
||||
|
||||
method flush*(s: PackedMerkleStorage) {.gcsafe.} =
|
||||
if not s.readOnly:
|
||||
flushFile(s.file)
|
||||
for levelFile in s.levelFiles:
|
||||
flushFile(levelFile)
|
||||
|
||||
proc newStreamingMerkleBuilder*(storage: MerkleStorage): StreamingMerkleBuilder =
|
||||
StreamingMerkleBuilder(
|
||||
frontier: @[],
|
||||
pendingIndices: @[],
|
||||
leafCount: 0,
|
||||
storage: storage
|
||||
)
|
||||
|
||||
proc addLeaf*(builder: StreamingMerkleBuilder, hash: MerkleHash): BResult[void] {.raises: [].} =
|
||||
try:
|
||||
var
|
||||
current = hash
|
||||
level = 0
|
||||
index = builder.leafCount
|
||||
|
||||
while builder.frontier.len <= level:
|
||||
builder.frontier.add(none(MerkleHash))
|
||||
builder.pendingIndices.add(0'u64)
|
||||
|
||||
while level < builder.frontier.len and builder.frontier[level].isSome:
|
||||
let
|
||||
sibling = builder.frontier[level].get()
|
||||
siblingIndex = builder.pendingIndices[level]
|
||||
|
||||
let r1 = builder.storage.putHash(level, siblingIndex, sibling)
|
||||
if r1.isErr:
|
||||
return err(r1.error)
|
||||
let r2 = builder.storage.putHash(level, siblingIndex + 1, current)
|
||||
if r2.isErr:
|
||||
return err(r2.error)
|
||||
|
||||
current = hashConcat(sibling, current)
|
||||
builder.frontier[level] = none(MerkleHash)
|
||||
|
||||
level += 1
|
||||
index = index shr 1
|
||||
|
||||
while builder.frontier.len <= level:
|
||||
builder.frontier.add(none(MerkleHash))
|
||||
builder.pendingIndices.add(0'u64)
|
||||
|
||||
builder.frontier[level] = some(current)
|
||||
builder.pendingIndices[level] = index
|
||||
builder.leafCount += 1
|
||||
ok()
|
||||
except CatchableError as e:
|
||||
err(ioError(e.msg))
|
||||
except Exception as e:
|
||||
err(ioError(e.msg))
|
||||
|
||||
proc finalize*(builder: StreamingMerkleBuilder): BResult[MerkleHash] {.raises: [].} =
|
||||
if builder.leafCount == 0:
|
||||
return err(merkleTreeError("Cannot finalize empty tree"))
|
||||
|
||||
let numLevels = computeNumLevels(builder.leafCount)
|
||||
|
||||
var
|
||||
current: Option[MerkleHash] = none(MerkleHash)
|
||||
currentIndex: uint64 = 0
|
||||
currentLevel: int = 0
|
||||
|
||||
for level in 0 ..< builder.frontier.len:
|
||||
if builder.frontier[level].isSome:
|
||||
let
|
||||
hash = builder.frontier[level].get()
|
||||
index = builder.pendingIndices[level]
|
||||
|
||||
?builder.storage.putHash(level, index, hash)
|
||||
|
||||
if current.isNone:
|
||||
current = some(hash)
|
||||
currentIndex = index
|
||||
currentLevel = level
|
||||
else:
|
||||
while currentLevel < level:
|
||||
currentLevel += 1
|
||||
currentIndex = currentIndex shr 1
|
||||
?builder.storage.putHash(currentLevel, currentIndex, current.get())
|
||||
|
||||
let combined = hashConcat(hash, current.get())
|
||||
current = some(combined)
|
||||
currentIndex = index shr 1
|
||||
currentLevel = level + 1
|
||||
|
||||
?builder.storage.putHash(currentLevel, currentIndex, current.get())
|
||||
|
||||
elif current.isSome and currentLevel == level:
|
||||
currentLevel += 1
|
||||
currentIndex = currentIndex shr 1
|
||||
?builder.storage.putHash(currentLevel, currentIndex, current.get())
|
||||
|
||||
if current.isNone:
|
||||
return err(merkleTreeError("Failed to compute root"))
|
||||
|
||||
?builder.storage.setMetadata(builder.leafCount, numLevels)
|
||||
?builder.storage.putHash(numLevels - 1, 0, current.get())
|
||||
builder.storage.flush()
|
||||
|
||||
ok(current.get())
|
||||
|
||||
proc leafCount*(builder: StreamingMerkleBuilder): uint64 =
|
||||
builder.leafCount
|
||||
|
||||
proc newMerkleReader*(storage: MerkleStorage): MerkleReader =
|
||||
MerkleReader(storage: storage)
|
||||
|
||||
proc close*(reader: MerkleReader) =
|
||||
if reader.storage != nil:
|
||||
discard reader.storage.close()
|
||||
|
||||
proc root*(reader: MerkleReader): Option[MerkleHash] =
|
||||
let (leafCount, numLevels) = reader.storage.getMetadata()
|
||||
if numLevels == 0:
|
||||
return none(MerkleHash)
|
||||
reader.storage.getHash(numLevels - 1, 0)
|
||||
|
||||
proc leafCount*(reader: MerkleReader): uint64 =
|
||||
reader.storage.getMetadata().leafCount
|
||||
|
||||
proc getProof*(reader: MerkleReader, index: uint64): BResult[MerkleProof] =
|
||||
let (leafCount, numLevels) = reader.storage.getMetadata()
|
||||
|
||||
if index >= leafCount:
|
||||
return err(invalidBlockError())
|
||||
|
||||
var
|
||||
path: seq[MerkleProofNode] = @[]
|
||||
idx = index
|
||||
|
||||
for level in 0 ..< numLevels - 1:
|
||||
let
|
||||
siblingIdx = idx xor 1
|
||||
maxIdx = nodesAtLevel(leafCount, level)
|
||||
|
||||
if siblingIdx < maxIdx:
|
||||
let siblingOpt = reader.storage.getHash(level, siblingIdx)
|
||||
if siblingOpt.isSome:
|
||||
path.add(MerkleProofNode(hash: siblingOpt.get(), level: level))
|
||||
|
||||
idx = idx shr 1
|
||||
|
||||
ok(MerkleProof(
|
||||
index: index,
|
||||
path: path,
|
||||
leafCount: leafCount
|
||||
))
|
||||
|
||||
proc newMerkleTreeBuilder*(): MerkleTreeBuilder =
|
||||
MerkleTreeBuilder(
|
||||
leaves: @[],
|
||||
tree: @[],
|
||||
built: false
|
||||
)
|
||||
|
||||
proc addBlock*(builder: MerkleTreeBuilder, blockData: openArray[byte]) =
|
||||
if builder.built:
|
||||
raise newException(Defect, "Cannot add blocks after tree has been built")
|
||||
builder.leaves.add(sha256Hash(blockData))
|
||||
|
||||
proc buildTree*(builder: MerkleTreeBuilder) =
|
||||
if builder.built or builder.leaves.len == 0:
|
||||
return
|
||||
|
||||
builder.tree = @[]
|
||||
builder.tree.add(builder.leaves)
|
||||
|
||||
var currentLevel = builder.leaves
|
||||
while currentLevel.len > 1:
|
||||
var
|
||||
nextLevel: seq[array[32, byte]] = @[]
|
||||
i = 0
|
||||
while i < currentLevel.len:
|
||||
if i + 1 < currentLevel.len:
|
||||
nextLevel.add(hashConcat(currentLevel[i], currentLevel[i + 1]))
|
||||
else:
|
||||
nextLevel.add(currentLevel[i])
|
||||
i += 2
|
||||
builder.tree.add(nextLevel)
|
||||
currentLevel = nextLevel
|
||||
|
||||
builder.built = true
|
||||
|
||||
proc root*(builder: MerkleTreeBuilder): Option[array[32, byte]] =
|
||||
if not builder.built or builder.tree.len == 0:
|
||||
return none(array[32, byte])
|
||||
some(builder.tree[^1][0])
|
||||
|
||||
proc rootCid*(builder: MerkleTreeBuilder): BResult[Cid] =
|
||||
if not builder.built:
|
||||
return err(merkleTreeError("Tree not built. Call buildTree() first"))
|
||||
|
||||
let rootOpt = builder.root()
|
||||
if rootOpt.isNone:
|
||||
return err(merkleTreeError("Failed to compute merkle root"))
|
||||
|
||||
let mh = ?wrap(Sha256Code, rootOpt.get())
|
||||
newCidV1(LogosStorageTree, mh)
|
||||
|
||||
proc blockCount*(builder: MerkleTreeBuilder): int =
|
||||
builder.leaves.len
|
||||
|
||||
proc getProof*(builder: MerkleTreeBuilder, index: int): BResult[MerkleProof] =
|
||||
if index < 0 or index >= builder.leaves.len:
|
||||
return err(invalidBlockError())
|
||||
|
||||
if not builder.built:
|
||||
return err(merkleTreeError("Tree not built. Call buildTree() first"))
|
||||
|
||||
var
|
||||
path: seq[MerkleProofNode] = @[]
|
||||
idx = index
|
||||
|
||||
for level in 0 ..< builder.tree.len - 1:
|
||||
let siblingIdx = if (idx mod 2) == 0: idx + 1 else: idx - 1
|
||||
if siblingIdx < builder.tree[level].len:
|
||||
path.add(MerkleProofNode(hash: builder.tree[level][siblingIdx], level: level))
|
||||
idx = idx div 2
|
||||
|
||||
ok(MerkleProof(
|
||||
index: index.uint64,
|
||||
path: path,
|
||||
leafCount: builder.leaves.len.uint64
|
||||
))
|
||||
|
||||
proc verify*(proof: MerkleProof, root: MerkleHash, leafHash: MerkleHash): bool =
|
||||
var
|
||||
current = leafHash
|
||||
idx = proof.index
|
||||
currentLevel = 0
|
||||
|
||||
for node in proof.path:
|
||||
while currentLevel < node.level:
|
||||
idx = idx shr 1
|
||||
currentLevel += 1
|
||||
|
||||
if (idx and 1) == 0:
|
||||
current = hashConcat(current, node.hash)
|
||||
else:
|
||||
current = hashConcat(node.hash, current)
|
||||
idx = idx shr 1
|
||||
currentLevel += 1
|
||||
|
||||
current == root
|
||||
|
||||
proc verify*(proof: MerkleProof, root: openArray[byte], data: openArray[byte]): BResult[bool] =
|
||||
if root.len != 32:
|
||||
return err(invalidProofError())
|
||||
|
||||
var rootHash: MerkleHash
|
||||
copyMem(addr rootHash[0], unsafeAddr root[0], 32)
|
||||
|
||||
var
|
||||
currentHash = sha256Hash(data)
|
||||
idx = proof.index
|
||||
currentLevel = 0
|
||||
|
||||
for node in proof.path:
|
||||
while currentLevel < node.level:
|
||||
idx = idx shr 1
|
||||
currentLevel += 1
|
||||
|
||||
if (idx and 1) == 0:
|
||||
currentHash = hashConcat(currentHash, node.hash)
|
||||
else:
|
||||
currentHash = hashConcat(node.hash, currentHash)
|
||||
idx = idx shr 1
|
||||
currentLevel += 1
|
||||
|
||||
ok(currentHash == rootHash)
|
||||
|
||||
proc rootToCid*(root: MerkleHash, hashCode: MultiCodec, treeCodec: MultiCodec): BResult[Cid] =
|
||||
let mh = ?wrap(hashCode, root)
|
||||
newCidV1(treeCodec, mh)
|
||||
|
||||
proc rootToCid*(root: MerkleHash): BResult[Cid] =
|
||||
rootToCid(root, Sha256Code, LogosStorageTree)
|
||||
|
||||
proc collectLeavesUnderNode(nodeIdx: int, levelSize: int, totalLeaves: int, leaves: var HashSet[int])
|
||||
|
||||
proc getRequiredLeafIndices*(start: int, count: int, totalLeaves: int): HashSet[int] =
|
||||
result = initHashSet[int]()
|
||||
|
||||
var have = initHashSet[int]()
|
||||
for i in start ..< start + count:
|
||||
have.incl(i)
|
||||
|
||||
var levelSize = totalLeaves
|
||||
|
||||
while levelSize > 1:
|
||||
var
|
||||
nextHave = initHashSet[int]()
|
||||
processedPairs = initHashSet[int]()
|
||||
|
||||
for idx in have:
|
||||
let pairIdx = idx div 2
|
||||
|
||||
if pairIdx in processedPairs:
|
||||
continue
|
||||
processedPairs.incl(pairIdx)
|
||||
|
||||
let
|
||||
leftIdx = pairIdx * 2
|
||||
rightIdx = pairIdx * 2 + 1
|
||||
|
||||
haveLeft = leftIdx in have
|
||||
haveRight = rightIdx < levelSize and rightIdx in have
|
||||
|
||||
if haveLeft and not haveRight and rightIdx < levelSize:
|
||||
collectLeavesUnderNode(rightIdx, levelSize, totalLeaves, result)
|
||||
elif not haveLeft and haveRight:
|
||||
collectLeavesUnderNode(leftIdx, levelSize, totalLeaves, result)
|
||||
|
||||
nextHave.incl(pairIdx)
|
||||
|
||||
levelSize = (levelSize + 1) div 2
|
||||
have = nextHave
|
||||
|
||||
proc collectLeavesUnderNode(nodeIdx: int, levelSize: int, totalLeaves: int, leaves: var HashSet[int]) =
|
||||
var
|
||||
currentSize = levelSize
|
||||
levelsToLeaves = 0
|
||||
while currentSize < totalLeaves:
|
||||
currentSize = currentSize * 2
|
||||
inc levelsToLeaves
|
||||
|
||||
let
|
||||
leavesPerNode = 1 shl levelsToLeaves
|
||||
startLeaf = nodeIdx * leavesPerNode
|
||||
endLeaf = min((nodeIdx + 1) * leavesPerNode, totalLeaves)
|
||||
|
||||
for leafIdx in startLeaf ..< endLeaf:
|
||||
leaves.incl(leafIdx)
|
||||
6
blockstore/multicodec_exts.nim
Normal file
6
blockstore/multicodec_exts.nim
Normal file
@ -0,0 +1,6 @@
|
||||
## LogosStorage multicodec extensions for libp2p
|
||||
const CodecExts* = @[
|
||||
("logos-storage-manifest", 0xCD01),
|
||||
("logos-storage-block", 0xCD02),
|
||||
("logos-storage-tree", 0xCD03),
|
||||
]
|
||||
436
blockstore/repostore.nim
Normal file
436
blockstore/repostore.nim
Normal file
@ -0,0 +1,436 @@
|
||||
import std/[os, locks, atomics, strutils, times, options, tables]
|
||||
when defined(posix):
|
||||
import std/posix
|
||||
import chronos
|
||||
import chronos/asyncsync
|
||||
import leveldbstatic as leveldb
|
||||
|
||||
import ./errors
|
||||
import ./cid
|
||||
import ./blocks as blk
|
||||
import ./serialization
|
||||
import ./sharding
|
||||
import ./ioutils
|
||||
|
||||
export PendingDeletion, BlockInfo
|
||||
export IOMode
|
||||
|
||||
const
|
||||
BlockInfoPrefix = "block_info:"
|
||||
PendingDeletionsPrefix = "pending_deletions:"
|
||||
UsedKey = "repo_metadata:used"
|
||||
|
||||
type
|
||||
SyncWorker* = ref object
|
||||
mutex: Lock
|
||||
cond: Cond
|
||||
running: Atomic[bool]
|
||||
thread: Thread[pointer]
|
||||
blocksDir: string
|
||||
|
||||
CidLock* = ref object
|
||||
lock*: AsyncLock
|
||||
waiters*: int
|
||||
|
||||
RepoStore* = ref object
|
||||
blocksDir: string
|
||||
db: LevelDb
|
||||
quota: uint64
|
||||
used: Atomic[uint64]
|
||||
ioMode: IOMode
|
||||
syncBatchSize: int
|
||||
syncWorker: SyncWorker
|
||||
writeCount: Atomic[int]
|
||||
cidLocks: Table[string, CidLock]
|
||||
|
||||
when defined(linux):
|
||||
proc syncfs(fd: cint): cint {.importc, header: "<unistd.h>".}
|
||||
|
||||
proc doSync(blocksDir: string) =
|
||||
when defined(linux):
|
||||
let fd = posix.open(blocksDir.cstring, O_RDONLY)
|
||||
if fd >= 0:
|
||||
discard syncfs(fd)
|
||||
discard posix.close(fd)
|
||||
elif defined(posix):
|
||||
proc sync() {.importc, header: "<unistd.h>".}
|
||||
sync()
|
||||
else:
|
||||
discard
|
||||
|
||||
proc syncWorkerLoop(workerPtr: pointer) {.thread, nimcall.} =
|
||||
let worker = cast[SyncWorker](workerPtr)
|
||||
while true:
|
||||
acquire(worker.mutex)
|
||||
while worker.running.load():
|
||||
wait(worker.cond, worker.mutex)
|
||||
if not worker.running.load():
|
||||
break
|
||||
release(worker.mutex)
|
||||
doSync(worker.blocksDir)
|
||||
acquire(worker.mutex)
|
||||
release(worker.mutex)
|
||||
doSync(worker.blocksDir)
|
||||
break
|
||||
|
||||
proc newSyncWorker*(blocksDir: string): SyncWorker =
|
||||
result = SyncWorker(blocksDir: blocksDir)
|
||||
initLock(result.mutex)
|
||||
initCond(result.cond)
|
||||
result.running.store(true)
|
||||
createThread(result.thread, syncWorkerLoop, cast[pointer](result))
|
||||
|
||||
proc triggerSync*(worker: SyncWorker) =
|
||||
signal(worker.cond)
|
||||
|
||||
proc stopSyncWorker*(worker: SyncWorker) =
|
||||
worker.running.store(false)
|
||||
signal(worker.cond)
|
||||
joinThread(worker.thread)
|
||||
deinitCond(worker.cond)
|
||||
deinitLock(worker.mutex)
|
||||
|
||||
proc calculateUsedFromDb(db: LevelDb): uint64 =
|
||||
result = 0
|
||||
for key, value in db.iter():
|
||||
if key.startsWith(BlockInfoPrefix):
|
||||
let infoResult = deserializeBlockInfo(cast[seq[byte]](value))
|
||||
if infoResult.isOk:
|
||||
result += infoResult.value.size.uint64
|
||||
elif key.startsWith(PendingDeletionsPrefix):
|
||||
let pdResult = deserializePendingDeletion(cast[seq[byte]](value))
|
||||
if pdResult.isOk:
|
||||
result += pdResult.value.size
|
||||
|
||||
proc newRepoStore*(blocksDir: string, db: LevelDb, quota: uint64 = 0,
|
||||
ioMode: IOMode = ioDirect,
|
||||
syncBatchSize: int = 0): BResult[RepoStore] =
|
||||
?initShardDirectories(blocksDir)
|
||||
|
||||
var used: uint64 = 0
|
||||
try:
|
||||
let usedBytesOpt = db.get(UsedKey)
|
||||
if usedBytesOpt.isSome and usedBytesOpt.get.len > 0:
|
||||
let usedResult = deserializeUint64(cast[seq[byte]](usedBytesOpt.get))
|
||||
if usedResult.isOk:
|
||||
used = usedResult.value
|
||||
else:
|
||||
used = calculateUsedFromDb(db)
|
||||
else:
|
||||
used = calculateUsedFromDb(db)
|
||||
let usedBytes = ?serializeUint64(used)
|
||||
db.put(UsedKey, cast[string](usedBytes))
|
||||
except LevelDbException as e:
|
||||
return err(databaseError(e.msg))
|
||||
|
||||
var syncWorker: SyncWorker = nil
|
||||
if ioMode == ioBuffered and syncBatchSize > 1:
|
||||
syncWorker = newSyncWorker(blocksDir)
|
||||
|
||||
var store = RepoStore(
|
||||
blocksDir: blocksDir,
|
||||
db: db,
|
||||
quota: quota,
|
||||
ioMode: ioMode,
|
||||
syncBatchSize: syncBatchSize,
|
||||
syncWorker: syncWorker
|
||||
)
|
||||
store.used.store(used)
|
||||
|
||||
ok(store)
|
||||
|
||||
proc close*(store: RepoStore) =
|
||||
if store.syncWorker != nil:
|
||||
stopSyncWorker(store.syncWorker)
|
||||
|
||||
proc acquireCidLock*(store: RepoStore, cidStr: string): Future[CidLock] {.async.} =
|
||||
var cl: CidLock
|
||||
|
||||
if cidStr in store.cidLocks:
|
||||
cl = store.cidLocks[cidStr]
|
||||
cl.waiters += 1
|
||||
else:
|
||||
cl = CidLock(lock: newAsyncLock(), waiters: 1)
|
||||
store.cidLocks[cidStr] = cl
|
||||
|
||||
await cl.lock.acquire()
|
||||
return cl
|
||||
|
||||
proc releaseCidLock*(store: RepoStore, cl: CidLock, cidStr: string) =
|
||||
cl.lock.release()
|
||||
|
||||
cl.waiters -= 1
|
||||
if cl.waiters == 0:
|
||||
store.cidLocks.del(cidStr)
|
||||
|
||||
proc used*(store: RepoStore): uint64 {.inline.} =
|
||||
store.used.load()
|
||||
|
||||
proc decreaseUsed*(store: RepoStore, size: uint64) {.inline.} =
|
||||
discard store.used.fetchSub(size)
|
||||
|
||||
proc quota*(store: RepoStore): uint64 {.inline.} =
|
||||
store.quota
|
||||
|
||||
proc wouldExceedQuota*(store: RepoStore, size: uint64): bool {.inline.} =
|
||||
if store.quota == 0:
|
||||
return false
|
||||
store.used() + size > store.quota
|
||||
|
||||
proc blocksDir*(store: RepoStore): string {.inline.} =
|
||||
store.blocksDir
|
||||
|
||||
proc getBlockPath(store: RepoStore, c: Cid): string {.inline.} =
|
||||
getShardedPath(store.blocksDir, c)
|
||||
|
||||
proc blockInfoKey(cidStr: string): string {.inline.} =
|
||||
BlockInfoPrefix & cidStr
|
||||
|
||||
proc pendingDeletionKey(cidStr: string): string {.inline.} =
|
||||
PendingDeletionsPrefix & cidStr
|
||||
|
||||
proc hasBlock*(store: RepoStore, c: Cid): BResult[bool] {.raises: [].} =
|
||||
let key = blockInfoKey($c)
|
||||
try:
|
||||
let valueOpt = store.db.get(key)
|
||||
ok(valueOpt.isSome)
|
||||
except LevelDbException as e:
|
||||
err(databaseError(e.msg))
|
||||
except CatchableError as e:
|
||||
err(databaseError(e.msg))
|
||||
|
||||
proc incrementRefCount(store: RepoStore, cidStr: string): BResult[void] =
|
||||
let key = blockInfoKey(cidStr)
|
||||
try:
|
||||
let valueOpt = store.db.get(key)
|
||||
if valueOpt.isSome:
|
||||
let infoResult = deserializeBlockInfo(cast[seq[byte]](valueOpt.get))
|
||||
if infoResult.isOk:
|
||||
var info = infoResult.value
|
||||
info.refCount += 1
|
||||
let infoBytes = ?serializeBlockInfo(info)
|
||||
store.db.put(key, cast[string](infoBytes))
|
||||
ok()
|
||||
except LevelDbException as e:
|
||||
err(databaseError(e.msg))
|
||||
except Exception as e:
|
||||
err(databaseError(e.msg))
|
||||
|
||||
proc putBlock*(store: RepoStore, b: blk.Block): Future[BResult[bool]] {.async.} =
|
||||
let cidStr = $b.cid
|
||||
let blockPath = store.getBlockPath(b.cid)
|
||||
let blockSize = b.data.len.uint64
|
||||
|
||||
let hasIt = ?store.hasBlock(b.cid)
|
||||
if hasIt:
|
||||
?store.incrementRefCount(cidStr)
|
||||
return ok(false)
|
||||
|
||||
let cl = await store.acquireCidLock(cidStr)
|
||||
defer: store.releaseCidLock(cl, cidStr)
|
||||
|
||||
let hasIt2 = ?store.hasBlock(b.cid)
|
||||
if hasIt2:
|
||||
?store.incrementRefCount(cidStr)
|
||||
return ok(false)
|
||||
|
||||
let fileExisted = fileExists(blockPath)
|
||||
|
||||
var newUsed: uint64
|
||||
if fileExisted:
|
||||
newUsed = store.used.load()
|
||||
else:
|
||||
if store.wouldExceedQuota(blockSize):
|
||||
return err(quotaExceededError())
|
||||
|
||||
case store.ioMode
|
||||
of ioDirect:
|
||||
let writeResult = writeBlockToFile(blockPath, b.data, ioDirect)
|
||||
if writeResult.isErr:
|
||||
return err(writeResult.error)
|
||||
|
||||
of ioBuffered:
|
||||
if store.syncBatchSize == 0:
|
||||
let writeResult = writeBlockToFile(blockPath, b.data, ioBuffered)
|
||||
if writeResult.isErr:
|
||||
return err(writeResult.error)
|
||||
|
||||
elif store.syncBatchSize == 1:
|
||||
let fileResult = writeBlockBuffered(blockPath, b.data)
|
||||
if fileResult.isErr:
|
||||
return err(fileResult.error)
|
||||
let syncResult = syncAndCloseFile(fileResult.value)
|
||||
if syncResult.isErr:
|
||||
return err(syncResult.error)
|
||||
|
||||
else:
|
||||
let writeResult = writeBlockToFile(blockPath, b.data, ioBuffered)
|
||||
if writeResult.isErr:
|
||||
return err(writeResult.error)
|
||||
let count = store.writeCount.fetchAdd(1) + 1
|
||||
if count mod store.syncBatchSize == 0:
|
||||
store.syncWorker.triggerSync()
|
||||
|
||||
newUsed = store.used.fetchAdd(blockSize) + blockSize
|
||||
|
||||
let info = BlockInfo(size: b.data.len, refCount: 1)
|
||||
let
|
||||
infoBytes = ?serializeBlockInfo(info)
|
||||
usedBytes = ?serializeUint64(newUsed)
|
||||
try:
|
||||
store.db.put(blockInfoKey(cidStr), cast[string](infoBytes))
|
||||
store.db.put(UsedKey, cast[string](usedBytes))
|
||||
ok(not fileExisted)
|
||||
except LevelDbException as e:
|
||||
err(databaseError(e.msg))
|
||||
except Exception as e:
|
||||
err(databaseError(e.msg))
|
||||
|
||||
proc getBlock*(store: RepoStore, c: Cid): Future[BResult[Option[blk.Block]]] {.async.} =
|
||||
let blockPath = store.getBlockPath(c)
|
||||
|
||||
let hasIt = ?store.hasBlock(c)
|
||||
if not hasIt:
|
||||
return ok(none(blk.Block))
|
||||
|
||||
if not fileExists(blockPath):
|
||||
return ok(none(blk.Block))
|
||||
|
||||
var data: seq[byte]
|
||||
try:
|
||||
data = cast[seq[byte]](readFile(blockPath))
|
||||
except IOError as e:
|
||||
return err(ioError(e.msg))
|
||||
|
||||
let b = ?blk.newBlock(data)
|
||||
if b.cid != c:
|
||||
return err(cidError("Block CID mismatch"))
|
||||
|
||||
ok(some(b))
|
||||
|
||||
proc getBlockUnchecked*(store: RepoStore, c: Cid): Future[BResult[Option[blk.Block]]] {.async.} =
|
||||
let blockPath = store.getBlockPath(c)
|
||||
|
||||
let hasIt = ?store.hasBlock(c)
|
||||
if not hasIt:
|
||||
return ok(none(blk.Block))
|
||||
|
||||
if not fileExists(blockPath):
|
||||
return ok(none(blk.Block))
|
||||
|
||||
var data: seq[byte]
|
||||
try:
|
||||
data = cast[seq[byte]](readFile(blockPath))
|
||||
except IOError as e:
|
||||
return err(ioError(e.msg))
|
||||
|
||||
ok(some(blk.fromCidUnchecked(c, data)))
|
||||
|
||||
proc releaseBlock*(store: RepoStore, c: Cid): BResult[bool] =
|
||||
let cidStr = $c
|
||||
let blockPath = store.getBlockPath(c)
|
||||
let key = blockInfoKey(cidStr)
|
||||
|
||||
try:
|
||||
let valueOpt = store.db.get(key)
|
||||
if valueOpt.isNone:
|
||||
return ok(false)
|
||||
|
||||
var info = ?deserializeBlockInfo(cast[seq[byte]](valueOpt.get))
|
||||
if info.refCount == 0:
|
||||
return err(databaseError("Block ref_count already 0"))
|
||||
|
||||
info.refCount -= 1
|
||||
|
||||
if info.refCount == 0:
|
||||
let blockSize = info.size.uint64
|
||||
let pd = PendingDeletion(
|
||||
queuedAt: epochTime().uint64,
|
||||
blockPath: blockPath,
|
||||
size: blockSize
|
||||
)
|
||||
let pdBytes = ?serializePendingDeletion(pd)
|
||||
store.db.delete(key)
|
||||
store.db.put(pendingDeletionKey(cidStr), cast[string](pdBytes))
|
||||
return ok(true)
|
||||
else:
|
||||
let infoBytes = ?serializeBlockInfo(info)
|
||||
store.db.put(key, cast[string](infoBytes))
|
||||
return ok(false)
|
||||
|
||||
except LevelDbException as e:
|
||||
err(databaseError(e.msg))
|
||||
|
||||
proc getPendingDeletions*(store: RepoStore, limit: int): BResult[seq[(string, PendingDeletion)]] =
|
||||
var entries: seq[(string, PendingDeletion)] = @[]
|
||||
try:
|
||||
for key, value in store.db.iter():
|
||||
if not key.startsWith(PendingDeletionsPrefix):
|
||||
continue
|
||||
let cidStr = key[PendingDeletionsPrefix.len .. ^1]
|
||||
let pdResult = deserializePendingDeletion(cast[seq[byte]](value))
|
||||
if pdResult.isOk:
|
||||
entries.add((cidStr, pdResult.value))
|
||||
if entries.len >= limit:
|
||||
break
|
||||
ok(entries)
|
||||
except LevelDbException as e:
|
||||
err(databaseError(e.msg))
|
||||
|
||||
proc pendingDeletionsCount*(store: RepoStore): BResult[int] =
|
||||
var count = 0
|
||||
try:
|
||||
for key, _ in store.db.iter():
|
||||
if key.startsWith(PendingDeletionsPrefix):
|
||||
inc count
|
||||
ok(count)
|
||||
except LevelDbException as e:
|
||||
err(databaseError(e.msg))
|
||||
|
||||
proc deletePendingBlock*(store: RepoStore, c: Cid, blockPath: string, size: uint64): Future[BResult[bool]] {.async.} =
|
||||
let hasIt = ?store.hasBlock(c)
|
||||
if hasIt:
|
||||
return ok(false)
|
||||
|
||||
if fileExists(blockPath):
|
||||
try:
|
||||
removeFile(blockPath)
|
||||
discard store.used.fetchSub(size)
|
||||
except OSError as e:
|
||||
return err(ioError(e.msg))
|
||||
|
||||
ok(true)
|
||||
|
||||
proc removePendingDeletion*(store: RepoStore, cidStr: string): BResult[void] =
|
||||
try:
|
||||
store.db.delete(pendingDeletionKey(cidStr))
|
||||
ok()
|
||||
except LevelDbException as e:
|
||||
err(databaseError(e.msg))
|
||||
|
||||
proc removePendingDeletionsBatch*(store: RepoStore, cidStrs: seq[string]): BResult[void] =
|
||||
if cidStrs.len == 0:
|
||||
return ok()
|
||||
let currentUsed = store.used.load()
|
||||
let usedBytes = ?serializeUint64(currentUsed)
|
||||
try:
|
||||
let batch = newBatch()
|
||||
for cidStr in cidStrs:
|
||||
batch.delete(pendingDeletionKey(cidStr))
|
||||
batch.put(UsedKey, cast[string](usedBytes))
|
||||
store.db.write(batch)
|
||||
ok()
|
||||
except LevelDbException as e:
|
||||
err(databaseError(e.msg))
|
||||
|
||||
proc totalSize*(store: RepoStore): BResult[uint64] =
|
||||
var total: uint64 = 0
|
||||
try:
|
||||
for key, value in store.db.iter():
|
||||
if key.startsWith(BlockInfoPrefix):
|
||||
let infoResult = deserializeBlockInfo(cast[seq[byte]](value))
|
||||
if infoResult.isOk:
|
||||
total += infoResult.value.size.uint64
|
||||
ok(total)
|
||||
except LevelDbException as e:
|
||||
err(databaseError(e.msg))
|
||||
171
blockstore/serialization.nim
Normal file
171
blockstore/serialization.nim
Normal file
@ -0,0 +1,171 @@
|
||||
import std/[streams, endians]
|
||||
import results
|
||||
|
||||
import ./errors
|
||||
|
||||
proc writeUint8*(s: Stream, v: uint8) =
|
||||
s.write(v)
|
||||
|
||||
proc readUint8*(s: Stream): uint8 =
|
||||
s.read(result)
|
||||
|
||||
proc writeUint16*(s: Stream, v: uint16) =
|
||||
var le: uint16
|
||||
littleEndian16(addr le, unsafeAddr v)
|
||||
s.write(le)
|
||||
|
||||
proc readUint16*(s: Stream): uint16 =
|
||||
var le: uint16
|
||||
s.read(le)
|
||||
littleEndian16(addr result, addr le)
|
||||
|
||||
proc writeUint32*(s: Stream, v: uint32) =
|
||||
var le: uint32
|
||||
littleEndian32(addr le, unsafeAddr v)
|
||||
s.write(le)
|
||||
|
||||
proc readUint32*(s: Stream): uint32 =
|
||||
var le: uint32
|
||||
s.read(le)
|
||||
littleEndian32(addr result, addr le)
|
||||
|
||||
proc writeUint64*(s: Stream, v: uint64) =
|
||||
var le: uint64
|
||||
littleEndian64(addr le, unsafeAddr v)
|
||||
s.write(le)
|
||||
|
||||
proc readUint64*(s: Stream): uint64 =
|
||||
var le: uint64
|
||||
s.read(le)
|
||||
littleEndian64(addr result, addr le)
|
||||
|
||||
proc writeInt64*(s: Stream, v: int64) =
|
||||
writeUint64(s, cast[uint64](v))
|
||||
|
||||
proc readInt64*(s: Stream): int64 =
|
||||
cast[int64](readUint64(s))
|
||||
|
||||
proc writeBytes*(s: Stream, data: openArray[byte]) =
|
||||
s.writeUint64(data.len.uint64)
|
||||
if data.len > 0:
|
||||
s.writeData(unsafeAddr data[0], data.len)
|
||||
|
||||
proc readBytes*(s: Stream): BResult[seq[byte]] =
|
||||
let len = s.readUint64().int
|
||||
if len > 0:
|
||||
var data = newSeq[byte](len)
|
||||
let bytesRead = s.readData(addr data[0], len)
|
||||
if bytesRead != len:
|
||||
return err(ioError("Failed to read " & $len & " bytes, got " & $bytesRead))
|
||||
ok(data)
|
||||
else:
|
||||
ok(newSeq[byte]())
|
||||
|
||||
proc writeString*(s: Stream, str: string) =
|
||||
writeBytes(s, cast[seq[byte]](str))
|
||||
|
||||
proc readString*(s: Stream): BResult[string] =
|
||||
let bytes = ?readBytes(s)
|
||||
var res = newString(bytes.len)
|
||||
if bytes.len > 0:
|
||||
copyMem(addr res[0], unsafeAddr bytes[0], bytes.len)
|
||||
ok(res)
|
||||
|
||||
proc writeBool*(s: Stream, v: bool) =
|
||||
s.writeUint8(if v: 1 else: 0)
|
||||
|
||||
proc readBool*(s: Stream): bool =
|
||||
s.readUint8() != 0
|
||||
|
||||
proc toBytes*[T](obj: T, writer: proc(s: Stream, v: T) {.gcsafe.}): BResult[seq[byte]] {.gcsafe.} =
|
||||
try:
|
||||
let s = newStringStream()
|
||||
{.cast(gcsafe).}:
|
||||
writer(s, obj)
|
||||
s.setPosition(0)
|
||||
ok(cast[seq[byte]](s.readAll()))
|
||||
except Exception as e:
|
||||
err(serializationError("serialization failed: " & e.msg))
|
||||
|
||||
proc fromBytes*[T](data: openArray[byte], reader: proc(s: Stream): T {.gcsafe.}): BResult[T] {.gcsafe.} =
|
||||
try:
|
||||
var str = newString(data.len)
|
||||
if data.len > 0:
|
||||
copyMem(addr str[0], unsafeAddr data[0], data.len)
|
||||
let s = newStringStream(str)
|
||||
{.cast(gcsafe).}:
|
||||
ok(reader(s))
|
||||
except Exception as e:
|
||||
err(deserializationError("deserialization failed: " & e.msg))
|
||||
|
||||
proc fromBytesResult*[T](data: openArray[byte], reader: proc(s: Stream): BResult[T] {.gcsafe.}): BResult[T] {.gcsafe.} =
|
||||
try:
|
||||
var str = newString(data.len)
|
||||
if data.len > 0:
|
||||
copyMem(addr str[0], unsafeAddr data[0], data.len)
|
||||
let s = newStringStream(str)
|
||||
{.cast(gcsafe).}:
|
||||
reader(s)
|
||||
except Exception as e:
|
||||
err(deserializationError("deserialization failed: " & e.msg))
|
||||
|
||||
type
|
||||
BlockInfo* = object
|
||||
size*: int
|
||||
refCount*: uint32
|
||||
|
||||
proc writeBlockInfo*(s: Stream, info: BlockInfo) {.gcsafe.} =
|
||||
s.writeUint64(info.size.uint64)
|
||||
s.writeUint32(info.refCount)
|
||||
|
||||
proc readBlockInfo*(s: Stream): BlockInfo {.gcsafe.} =
|
||||
result.size = s.readUint64().int
|
||||
result.refCount = s.readUint32()
|
||||
|
||||
proc serializeBlockInfo*(info: BlockInfo): BResult[seq[byte]] =
|
||||
toBytes(info, writeBlockInfo)
|
||||
|
||||
proc deserializeBlockInfo*(data: openArray[byte]): BResult[BlockInfo] =
|
||||
fromBytes(data, readBlockInfo)
|
||||
|
||||
type
|
||||
PendingDeletion* = object
|
||||
queuedAt*: uint64
|
||||
blockPath*: string
|
||||
size*: uint64
|
||||
|
||||
proc writePendingDeletion*(s: Stream, pd: PendingDeletion) {.gcsafe.} =
|
||||
s.writeUint64(pd.queuedAt)
|
||||
s.writeString(pd.blockPath)
|
||||
s.writeUint64(pd.size)
|
||||
|
||||
proc readPendingDeletion*(s: Stream): BResult[PendingDeletion] {.gcsafe.} =
|
||||
var pd: PendingDeletion
|
||||
pd.queuedAt = s.readUint64()
|
||||
pd.blockPath = ?s.readString()
|
||||
pd.size = s.readUint64()
|
||||
ok(pd)
|
||||
|
||||
proc serializePendingDeletion*(pd: PendingDeletion): BResult[seq[byte]] =
|
||||
toBytes(pd, writePendingDeletion)
|
||||
|
||||
proc deserializePendingDeletion*(data: openArray[byte]): BResult[PendingDeletion] =
|
||||
fromBytesResult(data, readPendingDeletion)
|
||||
|
||||
proc serializeUint64*(v: uint64): BResult[seq[byte]] =
|
||||
try:
|
||||
let s = newStringStream()
|
||||
s.writeUint64(v)
|
||||
s.setPosition(0)
|
||||
ok(cast[seq[byte]](s.readAll()))
|
||||
except Exception as e:
|
||||
err(serializationError("serialization failed: " & e.msg))
|
||||
|
||||
proc deserializeUint64*(data: openArray[byte]): BResult[uint64] =
|
||||
if data.len < 8:
|
||||
return err(deserializationError("Data too short for uint64"))
|
||||
var str = newString(data.len)
|
||||
if data.len > 0:
|
||||
copyMem(addr str[0], unsafeAddr data[0], data.len)
|
||||
let s = newStringStream(str)
|
||||
ok(s.readUint64())
|
||||
31
blockstore/sha256.nim
Normal file
31
blockstore/sha256.nim
Normal file
@ -0,0 +1,31 @@
|
||||
## Compile with:
|
||||
## -d:useConstantine to use Constantine's SHA256 implementation
|
||||
## -d:useBlake3 to use BLAKE3 (hashlib) for benchmarking
|
||||
## Default uses nimcrypto SHA256
|
||||
when defined(useBlake3):
|
||||
import hashlib/misc/blake3
|
||||
|
||||
proc sha256Hash*(data: openArray[byte]): array[32, byte] =
|
||||
## Compute BLAKE3 hash (32 bytes, same size as SHA256)
|
||||
var ctx: Context[BLAKE3]
|
||||
ctx.init()
|
||||
ctx.update(data)
|
||||
ctx.final(result)
|
||||
|
||||
elif defined(useConstantine):
|
||||
import constantine/hashes
|
||||
|
||||
proc sha256Hash*(data: openArray[byte]): array[32, byte] =
|
||||
## Compute SHA2-256 hash using Constantine
|
||||
result = hashes.sha256.hash(data)
|
||||
|
||||
else:
|
||||
import nimcrypto/sha2
|
||||
|
||||
proc sha256Hash*(data: openArray[byte]): array[32, byte] =
|
||||
## Compute SHA2-256 hash using nimcrypto
|
||||
var ctx: sha256
|
||||
ctx.init()
|
||||
ctx.update(data)
|
||||
result = ctx.finish().data
|
||||
ctx.clear()
|
||||
52
blockstore/sharding.nim
Normal file
52
blockstore/sharding.nim
Normal file
@ -0,0 +1,52 @@
|
||||
import std/os
|
||||
import results
|
||||
|
||||
import ./errors
|
||||
import ./cid
|
||||
|
||||
const
|
||||
Base32Chars* = "abcdefghijklmnopqrstuvwxyz234567"
|
||||
TmpDirName* = "tmp"
|
||||
|
||||
proc initShardDirectories*(baseDir: string): BResult[void] =
|
||||
let marker = baseDir / ".shards_initialized"
|
||||
|
||||
if fileExists(marker):
|
||||
return ok()
|
||||
|
||||
try:
|
||||
createDir(baseDir)
|
||||
discard existsOrCreateDir(baseDir / TmpDirName)
|
||||
|
||||
for c1 in Base32Chars:
|
||||
let level1 = baseDir / $c1
|
||||
discard existsOrCreateDir(level1)
|
||||
for c2 in Base32Chars:
|
||||
let level2 = level1 / $c2
|
||||
discard existsOrCreateDir(level2)
|
||||
|
||||
writeFile(marker, "")
|
||||
ok()
|
||||
except OSError as e:
|
||||
err(ioError(e.msg))
|
||||
|
||||
proc cleanupTmpDir*(baseDir: string) =
|
||||
let tmpDir = baseDir / TmpDirName
|
||||
if dirExists(tmpDir):
|
||||
try:
|
||||
removeDir(tmpDir)
|
||||
createDir(tmpDir)
|
||||
except OSError:
|
||||
discard
|
||||
|
||||
proc getTmpPath*(baseDir: string, name: string, ext: string = ""): string =
|
||||
baseDir / TmpDirName / (name & ext)
|
||||
|
||||
proc getShardedPathStr*(baseDir: string, cidStr: string, ext: string = ""): string =
|
||||
let len = cidStr.len
|
||||
let d1 = cidStr[len - 2 .. len - 2]
|
||||
let d2 = cidStr[len - 1 .. len - 1]
|
||||
baseDir / d1 / d2 / (cidStr & ext)
|
||||
|
||||
proc getShardedPath*(baseDir: string, c: Cid, ext: string = ""): string =
|
||||
getShardedPathStr(baseDir, $c, ext)
|
||||
4
nim.cfg
Normal file
4
nim.cfg
Normal file
@ -0,0 +1,4 @@
|
||||
--path:"."
|
||||
# Register blockstore specific multicodecs with libp2p
|
||||
-d:libp2p_multicodec_exts="blockstore/multicodec_exts.nim"
|
||||
-d:libp2p_contentids_exts="blockstore/contentids_exts.nim"
|
||||
498
tests/bench_dataset.nim
Normal file
498
tests/bench_dataset.nim
Normal file
@ -0,0 +1,498 @@
|
||||
import std/[os, times, strformat, random, options, strutils]
|
||||
import chronos
|
||||
import taskpools
|
||||
import results
|
||||
import ../blockstore/errors
|
||||
import ../blockstore/blocks
|
||||
import ../blockstore/chunker
|
||||
import ../blockstore/dataset
|
||||
import ../blockstore/cid
|
||||
import ../blockstore/merkle
|
||||
import ../blockstore/ioutils
|
||||
import ../blockstore/blockmap
|
||||
|
||||
when defined(posix):
|
||||
import std/posix
|
||||
elif defined(windows):
|
||||
import std/winlean
|
||||
|
||||
const
|
||||
DefaultSize = 4'u64 * 1024 * 1024 * 1024
|
||||
DefaultChunkSize = 64 * 1024
|
||||
DefaultPoolSize = 4
|
||||
TestDir = "nim_blockstore_bench"
|
||||
TestFile = TestDir / "testfile.bin"
|
||||
DbPath = TestDir / "bench_db"
|
||||
BlocksDir = TestDir / "blocks"
|
||||
|
||||
type
|
||||
BenchConfig = object
|
||||
totalSize: uint64
|
||||
chunkSize: int
|
||||
merkleBackend: MerkleBackend
|
||||
blockBackend: BlockBackend
|
||||
blockmapBackend: BlockmapBackend
|
||||
ioMode: IOMode
|
||||
syncBatchSize: int
|
||||
synthetic: bool
|
||||
reportInterval: float
|
||||
poolSize: int
|
||||
blockHashConfig: BlockHashConfig
|
||||
|
||||
proc formatSize(bytes: uint64): string =
|
||||
if bytes >= 1024'u64 * 1024 * 1024 * 1024:
|
||||
&"{bytes.float / (1024 * 1024 * 1024 * 1024):.2f} TB"
|
||||
elif bytes >= 1024'u64 * 1024 * 1024:
|
||||
&"{bytes.float / (1024 * 1024 * 1024):.2f} GB"
|
||||
elif bytes >= 1024'u64 * 1024:
|
||||
&"{bytes.float / (1024 * 1024):.2f} MB"
|
||||
else:
|
||||
&"{bytes} bytes"
|
||||
|
||||
proc formatRate(bytesPerSec: float): string =
|
||||
if bytesPerSec >= 1024 * 1024 * 1024:
|
||||
&"{bytesPerSec / (1024 * 1024 * 1024):.2f} GB/s"
|
||||
elif bytesPerSec >= 1024 * 1024:
|
||||
&"{bytesPerSec / (1024 * 1024):.2f} MB/s"
|
||||
else:
|
||||
&"{bytesPerSec / 1024:.2f} KB/s"
|
||||
|
||||
proc parseSize(s: string): uint64 =
|
||||
var num = s
|
||||
var multiplier: uint64 = 1
|
||||
|
||||
if s.endsWith("TB") or s.endsWith("tb"):
|
||||
num = s[0..^3]
|
||||
multiplier = 1024'u64 * 1024 * 1024 * 1024
|
||||
elif s.endsWith("GB") or s.endsWith("gb"):
|
||||
num = s[0..^3]
|
||||
multiplier = 1024'u64 * 1024 * 1024
|
||||
elif s.endsWith("MB") or s.endsWith("mb"):
|
||||
num = s[0..^3]
|
||||
multiplier = 1024'u64 * 1024
|
||||
elif s.endsWith("KB") or s.endsWith("kb"):
|
||||
num = s[0..^3]
|
||||
multiplier = 1024'u64
|
||||
|
||||
try:
|
||||
result = uint64(parseInt(num)) * multiplier
|
||||
except ValueError:
|
||||
result = DefaultSize
|
||||
|
||||
proc syncFile(f: File) =
|
||||
flushFile(f)
|
||||
when defined(posix):
|
||||
discard fsync(f.getFileHandle().cint)
|
||||
elif defined(windows):
|
||||
discard flushFileBuffers(f.getFileHandle())
|
||||
|
||||
proc createTestFile(path: string, size: uint64) =
|
||||
echo &"Creating {formatSize(size)} test file..."
|
||||
let startTime = epochTime()
|
||||
|
||||
randomize()
|
||||
var f = open(path, fmWrite)
|
||||
|
||||
const bufSize = 1024 * 1024
|
||||
var buf = newSeq[byte](bufSize)
|
||||
var remaining = size
|
||||
|
||||
while remaining > 0:
|
||||
for i in 0 ..< bufSize:
|
||||
buf[i] = byte(rand(255))
|
||||
|
||||
let writeSize = min(remaining, bufSize.uint64)
|
||||
discard f.writeBytes(buf, 0, writeSize.int)
|
||||
remaining -= writeSize
|
||||
|
||||
syncFile(f)
|
||||
f.close()
|
||||
|
||||
let elapsed = epochTime() - startTime
|
||||
let rate = size.float / elapsed
|
||||
echo &" Created in {elapsed:.2f}s ({formatRate(rate)})"
|
||||
|
||||
proc cleanup() =
|
||||
if dirExists(TestDir):
|
||||
removeDir(TestDir)
|
||||
|
||||
proc runBenchmark(config: BenchConfig) {.async.} =
|
||||
echo "=== Dataset Ingestion Benchmark ==="
|
||||
echo &"Size: {formatSize(config.totalSize)}"
|
||||
echo &"Chunk size: {config.chunkSize div 1024} KB"
|
||||
echo &"Expected blocks: {config.totalSize div config.chunkSize.uint64}"
|
||||
echo &"Merkle backend: {config.merkleBackend}"
|
||||
echo &"Block backend: {config.blockBackend}"
|
||||
echo &"Blockmap backend: {config.blockmapBackend}"
|
||||
echo &"IO mode: {config.ioMode}"
|
||||
echo &"Sync batch size: {config.syncBatchSize}"
|
||||
echo &"Thread pool size: {config.poolSize}"
|
||||
echo &"Data mode: {(if config.synthetic: \"synthetic\" else: \"file-based (async)\")}"
|
||||
echo ""
|
||||
|
||||
cleanup()
|
||||
createDir(TestDir)
|
||||
createDir(BlocksDir)
|
||||
|
||||
if not config.synthetic:
|
||||
createTestFile(TestFile, config.totalSize)
|
||||
echo ""
|
||||
|
||||
echo "Initializing dataset store..."
|
||||
let storeResult = newDatasetStore(DbPath, BlocksDir,
|
||||
blockHashConfig = config.blockHashConfig,
|
||||
merkleBackend = config.merkleBackend,
|
||||
blockBackend = config.blockBackend,
|
||||
blockmapBackend = config.blockmapBackend,
|
||||
ioMode = config.ioMode,
|
||||
syncBatchSize = config.syncBatchSize)
|
||||
if storeResult.isErr:
|
||||
echo &"Failed to create store: {storeResult.error}"
|
||||
return
|
||||
|
||||
let store = storeResult.value
|
||||
|
||||
let filename = if config.synthetic: some("benchmark") else: some(TestFile)
|
||||
let builderResult = store.startDataset(config.chunkSize.uint32, filename)
|
||||
if builderResult.isErr:
|
||||
echo &"Failed to start dataset: {builderResult.error}"
|
||||
return
|
||||
|
||||
var builder = builderResult.value
|
||||
|
||||
echo "Ingesting blocks..."
|
||||
let ingestStart = epochTime()
|
||||
var blockCount: uint64 = 0
|
||||
var totalBytes: uint64 = 0
|
||||
var lastReport = ingestStart
|
||||
var lastBytes: uint64 = 0
|
||||
let totalBlocks = config.totalSize div config.chunkSize.uint64
|
||||
|
||||
if config.synthetic:
|
||||
var chunk = newSeq[byte](config.chunkSize)
|
||||
randomize()
|
||||
for i in 0 ..< config.chunkSize:
|
||||
chunk[i] = byte(rand(255))
|
||||
|
||||
while totalBytes < config.totalSize:
|
||||
chunk[0] = byte(blockCount and 0xFF)
|
||||
chunk[1] = byte((blockCount shr 8) and 0xFF)
|
||||
chunk[2] = byte((blockCount shr 16) and 0xFF)
|
||||
chunk[3] = byte((blockCount shr 24) and 0xFF)
|
||||
|
||||
let blkResult = newBlock(chunk, config.blockHashConfig)
|
||||
if blkResult.isErr:
|
||||
echo &"Failed to create block: {blkResult.error}"
|
||||
break
|
||||
|
||||
let blk = blkResult.value
|
||||
totalBytes += blk.data.len.uint64
|
||||
|
||||
let addResult = await builder.addBlock(blk)
|
||||
if addResult.isErr:
|
||||
echo &"Failed to add block: {addResult.error}"
|
||||
break
|
||||
|
||||
blockCount += 1
|
||||
|
||||
let now = epochTime()
|
||||
if now - lastReport >= config.reportInterval:
|
||||
let intervalBytes = totalBytes - lastBytes
|
||||
let intervalRate = intervalBytes.float / (now - lastReport)
|
||||
let overallRate = totalBytes.float / (now - ingestStart)
|
||||
let progress = (blockCount.float / totalBlocks.float) * 100
|
||||
let eta = if overallRate > 0: (config.totalSize - totalBytes).float / overallRate else: 0.0
|
||||
echo &" Progress: {progress:.1f}% | Blocks: {blockCount}/{totalBlocks} | Rate: {formatRate(intervalRate)} (avg: {formatRate(overallRate)}) | ETA: {eta:.0f}s"
|
||||
lastReport = now
|
||||
lastBytes = totalBytes
|
||||
else:
|
||||
var pool = Taskpool.new(numThreads = config.poolSize)
|
||||
defer: pool.shutdown()
|
||||
|
||||
let streamResult = await builder.chunkFile(pool)
|
||||
if streamResult.isErr:
|
||||
echo &"Failed to open file: {streamResult.error}"
|
||||
return
|
||||
|
||||
var stream = streamResult.value
|
||||
|
||||
while true:
|
||||
let blockOpt = await stream.nextBlock()
|
||||
if blockOpt.isNone:
|
||||
break
|
||||
|
||||
let blockResult = blockOpt.get()
|
||||
if blockResult.isErr:
|
||||
echo &"Block read error: {blockResult.error}"
|
||||
break
|
||||
|
||||
let blk = blockResult.value
|
||||
totalBytes += blk.data.len.uint64
|
||||
|
||||
let addResult = await builder.addBlock(blk)
|
||||
if addResult.isErr:
|
||||
echo &"Failed to add block: {addResult.error}"
|
||||
break
|
||||
|
||||
blockCount += 1
|
||||
|
||||
let now = epochTime()
|
||||
if now - lastReport >= config.reportInterval:
|
||||
let intervalBytes = totalBytes - lastBytes
|
||||
let intervalRate = intervalBytes.float / (now - lastReport)
|
||||
let overallRate = totalBytes.float / (now - ingestStart)
|
||||
let progress = (blockCount.float / totalBlocks.float) * 100
|
||||
let eta = if overallRate > 0: (config.totalSize - totalBytes).float / overallRate else: 0.0
|
||||
echo &" Progress: {progress:.1f}% | Blocks: {blockCount}/{totalBlocks} | Rate: {formatRate(intervalRate)} (avg: {formatRate(overallRate)}) | ETA: {eta:.0f}s"
|
||||
lastReport = now
|
||||
lastBytes = totalBytes
|
||||
|
||||
stream.close()
|
||||
|
||||
let ingestEnd = epochTime()
|
||||
let ingestTime = ingestEnd - ingestStart
|
||||
let ingestRate = totalBytes.float / ingestTime
|
||||
|
||||
echo ""
|
||||
echo "Ingestion complete:"
|
||||
echo &" Blocks: {blockCount}"
|
||||
echo &" Bytes: {formatSize(totalBytes)}"
|
||||
echo &" Time: {ingestTime:.2f}s"
|
||||
echo &" Rate: {formatRate(ingestRate)}"
|
||||
echo ""
|
||||
|
||||
echo "Finalizing dataset (building merkle tree)..."
|
||||
let finalizeStart = epochTime()
|
||||
|
||||
let datasetResult = await builder.finalize()
|
||||
if datasetResult.isErr:
|
||||
echo &"Failed to finalize: {datasetResult.error}"
|
||||
return
|
||||
|
||||
let dataset = datasetResult.value
|
||||
let finalizeEnd = epochTime()
|
||||
let finalizeTime = finalizeEnd - finalizeStart
|
||||
|
||||
echo &" Finalize time: {finalizeTime:.2f}s"
|
||||
echo ""
|
||||
|
||||
let totalTime = ingestTime + finalizeTime
|
||||
let overallRate = totalBytes.float / totalTime
|
||||
|
||||
echo "=== Write Summary ==="
|
||||
echo &" Dataset manifest CID: {dataset.manifestCid}"
|
||||
echo &" Dataset tree CID: {dataset.treeCid}"
|
||||
echo &" Total blocks: {dataset.blockCount}"
|
||||
echo &" Total time: {totalTime:.2f}s"
|
||||
echo &" Overall rate: {formatRate(overallRate)}"
|
||||
echo &" Storage used: {formatSize(store.used())}"
|
||||
echo ""
|
||||
|
||||
echo "=== Read Benchmark (without verification) ==="
|
||||
echo "Reading all blocks..."
|
||||
|
||||
let readStart = epochTime()
|
||||
var readBytes: uint64 = 0
|
||||
var readBlocks = 0
|
||||
var lastReadReport = readStart
|
||||
var lastReadBytes: uint64 = 0
|
||||
|
||||
for i in 0 ..< dataset.blockCount:
|
||||
let blockResult = await dataset.getBlock(i)
|
||||
if blockResult.isErr:
|
||||
echo &"Failed to read block {i}: {blockResult.error}"
|
||||
break
|
||||
|
||||
let blockOpt = blockResult.value
|
||||
if blockOpt.isNone:
|
||||
echo &"Block {i} not found"
|
||||
break
|
||||
|
||||
let (blk, _) = blockOpt.get()
|
||||
readBytes += blk.data.len.uint64
|
||||
readBlocks += 1
|
||||
|
||||
let now = epochTime()
|
||||
if now - lastReadReport >= config.reportInterval:
|
||||
let intervalBytes = readBytes - lastReadBytes
|
||||
let intervalRate = intervalBytes.float / (now - lastReadReport)
|
||||
let overallReadRate = readBytes.float / (now - readStart)
|
||||
let progress = (readBytes.float / totalBytes.float) * 100
|
||||
echo &" Progress: {progress:.1f}% | Blocks: {readBlocks} | Rate: {formatRate(intervalRate)} (avg: {formatRate(overallReadRate)})"
|
||||
lastReadReport = now
|
||||
lastReadBytes = readBytes
|
||||
|
||||
let readEnd = epochTime()
|
||||
let readTime = readEnd - readStart
|
||||
let readRate = readBytes.float / readTime
|
||||
|
||||
echo ""
|
||||
echo "Read complete (no verification):"
|
||||
echo &" Blocks read: {readBlocks}"
|
||||
echo &" Bytes read: {formatSize(readBytes)}"
|
||||
echo &" Time: {readTime:.2f}s"
|
||||
echo &" Rate: {formatRate(readRate)}"
|
||||
echo ""
|
||||
|
||||
echo "=== Read Benchmark (with verification) ==="
|
||||
echo "Reading and verifying all blocks..."
|
||||
|
||||
let mhashResult = dataset.treeCid.mhash()
|
||||
if mhashResult.isErr:
|
||||
echo &"Failed to get multihash from treeCid: {mhashResult.error}"
|
||||
return
|
||||
|
||||
let mhash = mhashResult.value
|
||||
var rootHash: MerkleHash
|
||||
let digestBytes = mhash.data.buffer
|
||||
if digestBytes.len >= HashSize + 2:
|
||||
copyMem(addr rootHash[0], unsafeAddr digestBytes[2], HashSize)
|
||||
else:
|
||||
echo "Invalid multihash length"
|
||||
return
|
||||
|
||||
let verifyStart = epochTime()
|
||||
var verifiedBlocks = 0
|
||||
var verifiedBytes: uint64 = 0
|
||||
var verifyFailed = 0
|
||||
var lastVerifyReport = verifyStart
|
||||
var lastVerifyBytes: uint64 = 0
|
||||
|
||||
for i in 0 ..< dataset.blockCount:
|
||||
let blockResult = await dataset.getBlock(i)
|
||||
if blockResult.isErr:
|
||||
echo &"Failed to read block {i}: {blockResult.error}"
|
||||
break
|
||||
|
||||
let blockOpt = blockResult.value
|
||||
if blockOpt.isNone:
|
||||
echo &"Block {i} not found"
|
||||
break
|
||||
|
||||
let (blk, proof) = blockOpt.get()
|
||||
|
||||
let leafHash = config.blockHashConfig.hashFunc(blk.data)
|
||||
if not verify(proof, rootHash, leafHash):
|
||||
verifyFailed += 1
|
||||
if verifyFailed <= 5:
|
||||
echo &" WARNING: Block {i} verification failed!"
|
||||
|
||||
verifiedBlocks += 1
|
||||
verifiedBytes += blk.data.len.uint64
|
||||
|
||||
let now = epochTime()
|
||||
if now - lastVerifyReport >= config.reportInterval:
|
||||
let intervalBytes = verifiedBytes - lastVerifyBytes
|
||||
let intervalRate = intervalBytes.float / (now - lastVerifyReport)
|
||||
let overallVerifyRate = verifiedBytes.float / (now - verifyStart)
|
||||
let progress = (verifiedBytes.float / totalBytes.float) * 100
|
||||
echo &" Progress: {progress:.1f}% | Verified: {verifiedBlocks} | Failed: {verifyFailed} | Rate: {formatRate(intervalRate)} (avg: {formatRate(overallVerifyRate)})"
|
||||
lastVerifyReport = now
|
||||
lastVerifyBytes = verifiedBytes
|
||||
|
||||
let verifyEnd = epochTime()
|
||||
let verifyTime = verifyEnd - verifyStart
|
||||
let verifyRate = verifiedBytes.float / verifyTime
|
||||
|
||||
echo ""
|
||||
echo "Read with verification complete:"
|
||||
echo &" Blocks verified: {verifiedBlocks}"
|
||||
echo &" Verification failures: {verifyFailed}"
|
||||
echo &" Bytes verified: {formatSize(verifiedBytes)}"
|
||||
echo &" Time: {verifyTime:.2f}s"
|
||||
echo &" Rate: {formatRate(verifyRate)}"
|
||||
echo ""
|
||||
|
||||
echo "Closing store..."
|
||||
await store.closeAsync()
|
||||
|
||||
echo "Cleaning up..."
|
||||
cleanup()
|
||||
echo "Done!"
|
||||
|
||||
proc printUsage() =
|
||||
echo "Usage: bench_dataset [options]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --size=<size> Dataset size (e.g., 1GB, 4GB, 100GB, 1TB)"
|
||||
echo " --chunk=<size> Chunk size in KB (default: 64)"
|
||||
echo " --merkle=<type> Merkle backend: embedded, leveldb, packed (default: packed)"
|
||||
echo " --blocks=<type> Block backend: sharded, packed (default: sharded)"
|
||||
echo " --blockmap=<type> Blockmap backend: leveldb, file (default: leveldb)"
|
||||
echo " --io=<mode> I/O mode: direct, buffered (default: direct)"
|
||||
echo " --sync=<value> Sync batch: none, every, or N (default: none)"
|
||||
echo " --pool=<size> Thread pool size for async I/O (default: 4, min: 2)"
|
||||
echo " --synthetic Use synthetic in-memory data (no file I/O)"
|
||||
echo " --help Show this help"
|
||||
|
||||
proc main() =
|
||||
var config = BenchConfig(
|
||||
totalSize: DefaultSize,
|
||||
chunkSize: DefaultChunkSize,
|
||||
merkleBackend: mbPacked,
|
||||
blockBackend: bbSharded,
|
||||
blockmapBackend: bmLevelDb,
|
||||
ioMode: ioDirect,
|
||||
syncBatchSize: 0,
|
||||
synthetic: false,
|
||||
reportInterval: 1.0,
|
||||
poolSize: DefaultPoolSize,
|
||||
blockHashConfig: defaultBlockHashConfig()
|
||||
)
|
||||
|
||||
for arg in commandLineParams():
|
||||
if arg.startsWith("--size="):
|
||||
config.totalSize = parseSize(arg[7..^1])
|
||||
elif arg.startsWith("--chunk="):
|
||||
config.chunkSize = parseInt(arg[8..^1]) * 1024
|
||||
elif arg.startsWith("--merkle="):
|
||||
let backend = arg[9..^1]
|
||||
case backend
|
||||
of "embedded", "embeddedproofs": config.merkleBackend = mbEmbeddedProofs
|
||||
of "leveldb": config.merkleBackend = mbLevelDb
|
||||
of "packed": config.merkleBackend = mbPacked
|
||||
else: echo &"Unknown merkle backend: {backend}"; return
|
||||
elif arg.startsWith("--blocks="):
|
||||
let backend = arg[9..^1]
|
||||
case backend
|
||||
of "sharded": config.blockBackend = bbSharded
|
||||
of "packed": config.blockBackend = bbPacked
|
||||
else: echo &"Unknown block backend: {backend}"; return
|
||||
elif arg.startsWith("--blockmap="):
|
||||
let backend = arg[11..^1]
|
||||
case backend
|
||||
of "leveldb": config.blockmapBackend = bmLevelDb
|
||||
of "file": config.blockmapBackend = bmFile
|
||||
else: echo &"Unknown blockmap backend: {backend}"; return
|
||||
elif arg.startsWith("--io="):
|
||||
let mode = arg[5..^1]
|
||||
case mode
|
||||
of "direct": config.ioMode = ioDirect
|
||||
of "buffered": config.ioMode = ioBuffered
|
||||
else: echo &"Unknown IO mode: {mode}"; return
|
||||
elif arg.startsWith("--sync="):
|
||||
let value = arg[7..^1]
|
||||
if value == "none":
|
||||
config.syncBatchSize = 0
|
||||
elif value == "every":
|
||||
config.syncBatchSize = 1
|
||||
else:
|
||||
try:
|
||||
config.syncBatchSize = parseInt(value)
|
||||
except ValueError:
|
||||
echo &"Invalid sync batch size: {value}"; return
|
||||
elif arg.startsWith("--pool="):
|
||||
try:
|
||||
config.poolSize = max(2, parseInt(arg[7..^1]))
|
||||
except ValueError:
|
||||
echo &"Invalid pool size: {arg[7..^1]}"; return
|
||||
elif arg == "--synthetic":
|
||||
config.synthetic = true
|
||||
elif arg == "--help":
|
||||
printUsage()
|
||||
return
|
||||
|
||||
waitFor runBenchmark(config)
|
||||
|
||||
when isMainModule:
|
||||
main()
|
||||
319
tests/bench_merkle.nim
Normal file
319
tests/bench_merkle.nim
Normal file
@ -0,0 +1,319 @@
|
||||
import std/[os, times, strformat, random, options, strutils]
|
||||
import chronos
|
||||
import results
|
||||
import leveldbstatic as leveldb
|
||||
import ../blockstore/errors
|
||||
import ../blockstore/blocks
|
||||
import ../blockstore/dataset
|
||||
import ../blockstore/merkle
|
||||
import ../blockstore/sha256
|
||||
import ../blockstore/cid
|
||||
|
||||
proc toHexStr(data: openArray[byte]): string =
|
||||
result = ""
|
||||
for b in data:
|
||||
result.add(b.toHex(2))
|
||||
|
||||
const
|
||||
DefaultChunkSize = 64 * 1024
|
||||
TestDir = "bench_merkle_streaming"
|
||||
DbPath = TestDir / "db"
|
||||
BlocksDir = TestDir / "blocks"
|
||||
TreesDir = TestDir / "trees"
|
||||
|
||||
type
|
||||
BenchConfig = object
|
||||
totalSize: uint64
|
||||
chunkSize: int
|
||||
backend: MerkleBackend
|
||||
storeBlocks: bool
|
||||
reportInterval: float
|
||||
|
||||
proc formatSize(bytes: uint64): string =
|
||||
if bytes >= 1024'u64 * 1024 * 1024 * 1024:
|
||||
&"{bytes.float / (1024 * 1024 * 1024 * 1024):.2f} TB"
|
||||
elif bytes >= 1024'u64 * 1024 * 1024:
|
||||
&"{bytes.float / (1024 * 1024 * 1024):.2f} GB"
|
||||
elif bytes >= 1024'u64 * 1024:
|
||||
&"{bytes.float / (1024 * 1024):.2f} MB"
|
||||
else:
|
||||
&"{bytes} bytes"
|
||||
|
||||
proc formatRate(bytesPerSec: float): string =
|
||||
if bytesPerSec >= 1024 * 1024 * 1024:
|
||||
&"{bytesPerSec / (1024 * 1024 * 1024):.2f} GB/s"
|
||||
elif bytesPerSec >= 1024 * 1024:
|
||||
&"{bytesPerSec / (1024 * 1024):.2f} MB/s"
|
||||
else:
|
||||
&"{bytesPerSec / 1024:.2f} KB/s"
|
||||
|
||||
proc cleanup() =
|
||||
if dirExists(TestDir):
|
||||
removeDir(TestDir)
|
||||
|
||||
proc runMerkleOnlyBenchmark(config: BenchConfig) =
|
||||
echo "=== Merkle Tree Only Benchmark ==="
|
||||
echo &"Simulated size: {formatSize(config.totalSize)}"
|
||||
echo &"Chunk size: {config.chunkSize div 1024} KB"
|
||||
echo &"Expected blocks: {config.totalSize div config.chunkSize.uint64}"
|
||||
echo &"Backend: {config.backend}"
|
||||
echo ""
|
||||
|
||||
cleanup()
|
||||
createDir(TestDir)
|
||||
createDir(TreesDir)
|
||||
|
||||
let treeId = "bench_" & $epochTime().int
|
||||
var storage: MerkleStorage
|
||||
case config.backend
|
||||
of mbPacked:
|
||||
let treePath = TreesDir / (treeId & ".tree")
|
||||
storage = newPackedMerkleStorage(treePath, forWriting = true).get()
|
||||
of mbLevelDb:
|
||||
let db = leveldb.open(DbPath)
|
||||
storage = newLevelDbMerkleStorage(db, "bench")
|
||||
of mbEmbeddedProofs:
|
||||
echo "Embedded proofs backend not supported for this benchmark"
|
||||
return
|
||||
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
var chunk = newSeq[byte](config.chunkSize)
|
||||
randomize()
|
||||
for i in 0 ..< config.chunkSize:
|
||||
chunk[i] = byte(rand(255))
|
||||
|
||||
echo "Building merkle tree..."
|
||||
let startTime = epochTime()
|
||||
var blockCount: uint64 = 0
|
||||
var processedBytes: uint64 = 0
|
||||
var lastReport = startTime
|
||||
var lastBytes: uint64 = 0
|
||||
let totalBlocks = config.totalSize div config.chunkSize.uint64
|
||||
|
||||
while processedBytes < config.totalSize:
|
||||
chunk[0] = byte(blockCount and 0xFF)
|
||||
chunk[1] = byte((blockCount shr 8) and 0xFF)
|
||||
chunk[2] = byte((blockCount shr 16) and 0xFF)
|
||||
chunk[3] = byte((blockCount shr 24) and 0xFF)
|
||||
|
||||
let leafHash = sha256Hash(chunk)
|
||||
let addResult = builder.addLeaf(leafHash)
|
||||
if addResult.isErr:
|
||||
echo &"Error adding leaf: {addResult.error.msg}"
|
||||
return
|
||||
|
||||
blockCount += 1
|
||||
processedBytes += config.chunkSize.uint64
|
||||
|
||||
let now = epochTime()
|
||||
if now - lastReport >= config.reportInterval:
|
||||
let intervalBytes = processedBytes - lastBytes
|
||||
let intervalRate = intervalBytes.float / (now - lastReport)
|
||||
let overallRate = processedBytes.float / (now - startTime)
|
||||
let progress = (blockCount.float / totalBlocks.float) * 100
|
||||
let eta = if overallRate > 0: (config.totalSize - processedBytes).float / overallRate else: 0.0
|
||||
echo &" Progress: {progress:.2f}% | Blocks: {blockCount}/{totalBlocks} | Rate: {formatRate(intervalRate)} (avg: {formatRate(overallRate)}) | ETA: {eta:.0f}s"
|
||||
lastReport = now
|
||||
lastBytes = processedBytes
|
||||
|
||||
echo ""
|
||||
echo "Finalizing tree..."
|
||||
let finalizeStart = epochTime()
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
if rootResult.isErr:
|
||||
echo &"Finalize failed: {rootResult.error}"
|
||||
return
|
||||
|
||||
let root = rootResult.value
|
||||
let finalizeEnd = epochTime()
|
||||
|
||||
var treeFileSize: int64 = 0
|
||||
if config.backend == mbPacked:
|
||||
let treePath = TreesDir / (treeId & ".tree")
|
||||
treeFileSize = getFileSize(treePath)
|
||||
|
||||
discard storage.close()
|
||||
|
||||
let totalTime = finalizeEnd - startTime
|
||||
let buildTime = finalizeStart - startTime
|
||||
let finalizeTime = finalizeEnd - finalizeStart
|
||||
let overallRate = processedBytes.float / totalTime
|
||||
|
||||
echo ""
|
||||
echo "=== Results ==="
|
||||
echo &" Root hash: {toHexStr(root[0..7])}..."
|
||||
echo &" Total blocks: {blockCount}"
|
||||
echo &" Simulated data: {formatSize(processedBytes)}"
|
||||
echo &" Build time: {buildTime:.2f}s"
|
||||
echo &" Finalize time: {finalizeTime:.2f}s"
|
||||
echo &" Total time: {totalTime:.2f}s"
|
||||
echo &" Throughput: {formatRate(overallRate)}"
|
||||
echo &" Blocks/sec: {blockCount.float / totalTime:.0f}"
|
||||
if treeFileSize > 0:
|
||||
echo &" Tree file size: {formatSize(treeFileSize.uint64)}"
|
||||
echo &" Overhead: {treeFileSize.float / processedBytes.float * 100:.4f}%"
|
||||
echo ""
|
||||
|
||||
cleanup()
|
||||
|
||||
proc runFullDatasetBenchmark(config: BenchConfig) {.async.} =
|
||||
echo "=== Full Dataset Benchmark ==="
|
||||
echo &"Simulated size: {formatSize(config.totalSize)}"
|
||||
echo &"Chunk size: {config.chunkSize div 1024} KB"
|
||||
echo &"Backend: {config.backend}"
|
||||
echo ""
|
||||
|
||||
cleanup()
|
||||
createDir(TestDir)
|
||||
createDir(BlocksDir)
|
||||
|
||||
let storeResult = newDatasetStore(DbPath, BlocksDir, merkleBackend = config.backend)
|
||||
if storeResult.isErr:
|
||||
echo &"Failed to create store: {storeResult.error}"
|
||||
return
|
||||
|
||||
let store = storeResult.value
|
||||
defer: store.close()
|
||||
|
||||
let builderResult = store.startDataset(config.chunkSize.uint32, some("benchmark"))
|
||||
if builderResult.isErr:
|
||||
echo &"Failed to start dataset: {builderResult.error}"
|
||||
return
|
||||
|
||||
var builder = builderResult.value
|
||||
|
||||
var chunk = newSeq[byte](config.chunkSize)
|
||||
randomize()
|
||||
for i in 0 ..< config.chunkSize:
|
||||
chunk[i] = byte(rand(255))
|
||||
|
||||
echo "Ingesting blocks..."
|
||||
let startTime = epochTime()
|
||||
var blockCount: uint64 = 0
|
||||
var processedBytes: uint64 = 0
|
||||
var lastReport = startTime
|
||||
var lastBytes: uint64 = 0
|
||||
|
||||
while processedBytes < config.totalSize:
|
||||
chunk[0] = byte(blockCount and 0xFF)
|
||||
chunk[1] = byte((blockCount shr 8) and 0xFF)
|
||||
chunk[2] = byte((blockCount shr 16) and 0xFF)
|
||||
chunk[3] = byte((blockCount shr 24) and 0xFF)
|
||||
|
||||
let blkResult = newBlock(chunk)
|
||||
if blkResult.isErr:
|
||||
echo &"Failed to create block: {blkResult.error}"
|
||||
return
|
||||
|
||||
let addResult = await builder.addBlock(blkResult.value)
|
||||
if addResult.isErr:
|
||||
echo &"Failed to add block: {addResult.error}"
|
||||
return
|
||||
|
||||
blockCount += 1
|
||||
processedBytes += config.chunkSize.uint64
|
||||
|
||||
let now = epochTime()
|
||||
if now - lastReport >= config.reportInterval:
|
||||
let intervalBytes = processedBytes - lastBytes
|
||||
let intervalRate = intervalBytes.float / (now - lastReport)
|
||||
let overallRate = processedBytes.float / (now - startTime)
|
||||
let progress = (processedBytes.float / config.totalSize.float) * 100
|
||||
echo &" Progress: {progress:.1f}% | Blocks: {blockCount} | Rate: {formatRate(intervalRate)} (avg: {formatRate(overallRate)})"
|
||||
lastReport = now
|
||||
lastBytes = processedBytes
|
||||
|
||||
echo ""
|
||||
echo "Finalizing dataset..."
|
||||
let finalizeStart = epochTime()
|
||||
|
||||
let datasetResult = await builder.finalize()
|
||||
if datasetResult.isErr:
|
||||
echo &"Failed to finalize: {datasetResult.error}"
|
||||
return
|
||||
|
||||
let dataset = datasetResult.value
|
||||
let totalTime = epochTime() - startTime
|
||||
let overallRate = processedBytes.float / totalTime
|
||||
|
||||
echo ""
|
||||
echo "=== Results ==="
|
||||
echo &" Manifest CID: {dataset.manifestCid}"
|
||||
echo &" Tree CID: {dataset.treeCid}"
|
||||
echo &" Total blocks: {dataset.blockCount}"
|
||||
echo &" Total time: {totalTime:.2f}s"
|
||||
echo &" Throughput: {formatRate(overallRate)}"
|
||||
echo &" Storage used: {formatSize(store.used())}"
|
||||
echo ""
|
||||
|
||||
cleanup()
|
||||
|
||||
proc printUsage() =
|
||||
echo "Usage: bench_merkle_streaming [options]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --size=<size> Dataset size (e.g., 1GB, 100GB, 1TB, 100TB)"
|
||||
echo " --chunk=<size> Chunk size in KB (default: 64)"
|
||||
echo " --backend=<type> Backend: packed, leveldb (default: packed)"
|
||||
echo " --full Run full dataset benchmark (with block storage)"
|
||||
echo " --help Show this help"
|
||||
|
||||
proc parseSize(s: string): uint64 =
|
||||
var num = s
|
||||
var multiplier: uint64 = 1
|
||||
|
||||
if s.endsWith("TB") or s.endsWith("tb"):
|
||||
num = s[0..^3]
|
||||
multiplier = 1024'u64 * 1024 * 1024 * 1024
|
||||
elif s.endsWith("GB") or s.endsWith("gb"):
|
||||
num = s[0..^3]
|
||||
multiplier = 1024'u64 * 1024 * 1024
|
||||
elif s.endsWith("MB") or s.endsWith("mb"):
|
||||
num = s[0..^3]
|
||||
multiplier = 1024'u64 * 1024
|
||||
elif s.endsWith("KB") or s.endsWith("kb"):
|
||||
num = s[0..^3]
|
||||
multiplier = 1024'u64
|
||||
|
||||
try:
|
||||
result = uint64(parseInt(num)) * multiplier
|
||||
except ValueError:
|
||||
result = 10'u64 * 1024 * 1024 * 1024
|
||||
|
||||
proc main() =
|
||||
var config = BenchConfig(
|
||||
totalSize: 10'u64 * 1024 * 1024 * 1024,
|
||||
chunkSize: DefaultChunkSize,
|
||||
backend: mbPacked,
|
||||
storeBlocks: false,
|
||||
reportInterval: 2.0
|
||||
)
|
||||
|
||||
var runFull = false
|
||||
|
||||
for arg in commandLineParams():
|
||||
if arg.startsWith("--size="):
|
||||
config.totalSize = parseSize(arg[7..^1])
|
||||
elif arg.startsWith("--chunk="):
|
||||
config.chunkSize = parseInt(arg[8..^1]) * 1024
|
||||
elif arg.startsWith("--backend="):
|
||||
let backend = arg[10..^1]
|
||||
case backend
|
||||
of "packed": config.backend = mbPacked
|
||||
of "leveldb": config.backend = mbLevelDb
|
||||
else: echo &"Unknown backend: {backend}"; return
|
||||
elif arg == "--full":
|
||||
runFull = true
|
||||
elif arg == "--help":
|
||||
printUsage()
|
||||
return
|
||||
|
||||
if runFull:
|
||||
waitFor runFullDatasetBenchmark(config)
|
||||
else:
|
||||
runMerkleOnlyBenchmark(config)
|
||||
|
||||
when isMainModule:
|
||||
main()
|
||||
90
tests/test_block.nim
Normal file
90
tests/test_block.nim
Normal file
@ -0,0 +1,90 @@
|
||||
import std/unittest
|
||||
import results
|
||||
import libp2p/multicodec
|
||||
import ../blockstore/errors
|
||||
import ../blockstore/cid
|
||||
import ../blockstore/blocks
|
||||
|
||||
suite "Block tests":
|
||||
test "block creation":
|
||||
let data = cast[seq[byte]]("hello world")
|
||||
let blockResult = newBlock(data)
|
||||
|
||||
check blockResult.isOk
|
||||
let b = blockResult.value
|
||||
check b.data == data
|
||||
check b.size == data.len
|
||||
|
||||
test "block verification":
|
||||
let data = cast[seq[byte]]("hello world")
|
||||
let blockResult = newBlock(data)
|
||||
check blockResult.isOk
|
||||
|
||||
let b = blockResult.value
|
||||
let verifyResult = b.verify()
|
||||
check verifyResult.isOk
|
||||
check verifyResult.value == true
|
||||
|
||||
test "block verification fails for corrupted data":
|
||||
let data = cast[seq[byte]]("verify me")
|
||||
let blockResult = newBlock(data)
|
||||
check blockResult.isOk
|
||||
|
||||
var b = blockResult.value
|
||||
b.data[0] = b.data[0] xor 1
|
||||
|
||||
let verifyResult = b.verify()
|
||||
check verifyResult.isOk
|
||||
check verifyResult.value == false
|
||||
|
||||
test "same data produces same CID":
|
||||
let data = cast[seq[byte]]("same_cid")
|
||||
let block1Result = newBlock(data)
|
||||
let block2Result = newBlock(data)
|
||||
|
||||
check block1Result.isOk
|
||||
check block2Result.isOk
|
||||
check block1Result.value.cid == block2Result.value.cid
|
||||
|
||||
test "different data produces different CID":
|
||||
let data1 = cast[seq[byte]]("data1")
|
||||
let data2 = cast[seq[byte]]("data2")
|
||||
let block1Result = newBlock(data1)
|
||||
let block2Result = newBlock(data2)
|
||||
|
||||
check block1Result.isOk
|
||||
check block2Result.isOk
|
||||
check block1Result.value.cid != block2Result.value.cid
|
||||
|
||||
test "CID has correct codec and hash":
|
||||
let data = cast[seq[byte]]("test data")
|
||||
let cidResult = computeCid(data)
|
||||
|
||||
check cidResult.isOk
|
||||
let c = cidResult.value
|
||||
check c.cidver == CIDv1
|
||||
check c.mcodec == LogosStorageBlock
|
||||
|
||||
test "CID string roundtrip":
|
||||
let data = cast[seq[byte]]("roundtrip test")
|
||||
let blockResult = newBlock(data)
|
||||
check blockResult.isOk
|
||||
|
||||
let c = blockResult.value.cid
|
||||
let cidStr = $c
|
||||
let parsedResult = cidFromString(cidStr)
|
||||
|
||||
check parsedResult.isOk
|
||||
check parsedResult.value == c
|
||||
|
||||
test "BlockMetadata creation":
|
||||
let data = cast[seq[byte]]("metadata test")
|
||||
let blockResult = newBlock(data)
|
||||
check blockResult.isOk
|
||||
|
||||
let b = blockResult.value
|
||||
let meta = newBlockMetadata(b.cid, b.size, 42)
|
||||
|
||||
check meta.size == b.size
|
||||
check meta.index == 42
|
||||
check meta.cid == $b.cid
|
||||
262
tests/test_chunker.nim
Normal file
262
tests/test_chunker.nim
Normal file
@ -0,0 +1,262 @@
|
||||
import std/[unittest, os, options]
|
||||
import chronos
|
||||
import taskpools
|
||||
import results
|
||||
import ../blockstore/errors
|
||||
import ../blockstore/blocks
|
||||
import ../blockstore/chunker
|
||||
|
||||
const testDir = getTempDir() / "nim_blockstore_test"
|
||||
|
||||
suite "Chunker tests":
|
||||
setup:
|
||||
createDir(testDir)
|
||||
|
||||
teardown:
|
||||
removeDir(testDir)
|
||||
|
||||
test "chunk small file":
|
||||
let testFile = testDir / "small.txt"
|
||||
let data = "hello world"
|
||||
writeFile(testFile, data)
|
||||
|
||||
let chunker = newSyncChunker()
|
||||
let iterResult = chunker.chunkFile(testFile)
|
||||
check iterResult.isOk
|
||||
|
||||
var iter = iterResult.value
|
||||
var blocks: seq[Block] = @[]
|
||||
|
||||
while true:
|
||||
let blockOpt = iter.nextBlock()
|
||||
if blockOpt.isNone:
|
||||
break
|
||||
check blockOpt.get().isOk
|
||||
blocks.add(blockOpt.get().value)
|
||||
|
||||
iter.close()
|
||||
|
||||
check blocks.len == 1
|
||||
check blocks[0].data == cast[seq[byte]](data)
|
||||
|
||||
test "chunk exact chunk size":
|
||||
let testFile = testDir / "exact.txt"
|
||||
let chunkSize = 1024
|
||||
var data = newSeq[byte](chunkSize)
|
||||
for i in 0 ..< chunkSize:
|
||||
data[i] = 42'u8
|
||||
writeFile(testFile, cast[string](data))
|
||||
|
||||
let config = newChunkerConfig(chunkSize)
|
||||
let chunker = newSyncChunker(config)
|
||||
let iterResult = chunker.chunkFile(testFile)
|
||||
check iterResult.isOk
|
||||
|
||||
var iter = iterResult.value
|
||||
var blocks: seq[Block] = @[]
|
||||
|
||||
while true:
|
||||
let blockOpt = iter.nextBlock()
|
||||
if blockOpt.isNone:
|
||||
break
|
||||
check blockOpt.get().isOk
|
||||
blocks.add(blockOpt.get().value)
|
||||
|
||||
iter.close()
|
||||
|
||||
check blocks.len == 1
|
||||
check blocks[0].data.len == chunkSize
|
||||
|
||||
test "chunk multiple chunks":
|
||||
let testFile = testDir / "multi.txt"
|
||||
let chunkSize = 1024
|
||||
let totalSize = chunkSize * 2 + 512
|
||||
var data = newSeq[byte](totalSize)
|
||||
for i in 0 ..< totalSize:
|
||||
data[i] = 42'u8
|
||||
writeFile(testFile, cast[string](data))
|
||||
|
||||
let config = newChunkerConfig(chunkSize)
|
||||
let chunker = newSyncChunker(config)
|
||||
let iterResult = chunker.chunkFile(testFile)
|
||||
check iterResult.isOk
|
||||
|
||||
var iter = iterResult.value
|
||||
var blocks: seq[Block] = @[]
|
||||
|
||||
while true:
|
||||
let blockOpt = iter.nextBlock()
|
||||
if blockOpt.isNone:
|
||||
break
|
||||
check blockOpt.get().isOk
|
||||
blocks.add(blockOpt.get().value)
|
||||
|
||||
iter.close()
|
||||
|
||||
check blocks.len == 3
|
||||
check blocks[0].data.len == chunkSize
|
||||
check blocks[1].data.len == chunkSize
|
||||
check blocks[2].data.len == 512
|
||||
|
||||
test "chunk empty file":
|
||||
let testFile = testDir / "empty.txt"
|
||||
writeFile(testFile, "")
|
||||
|
||||
let chunker = newSyncChunker()
|
||||
let iterResult = chunker.chunkFile(testFile)
|
||||
check iterResult.isOk
|
||||
|
||||
var iter = iterResult.value
|
||||
let blockOpt = iter.nextBlock()
|
||||
iter.close()
|
||||
|
||||
check blockOpt.isNone
|
||||
|
||||
test "unique block CIDs":
|
||||
let testFile = testDir / "unique.txt"
|
||||
writeFile(testFile, "aaaaaaaaaabbbbbbbbbb")
|
||||
|
||||
let config = newChunkerConfig(10)
|
||||
let chunker = newSyncChunker(config)
|
||||
let iterResult = chunker.chunkFile(testFile)
|
||||
check iterResult.isOk
|
||||
|
||||
var iter = iterResult.value
|
||||
var blocks: seq[Block] = @[]
|
||||
|
||||
while true:
|
||||
let blockOpt = iter.nextBlock()
|
||||
if blockOpt.isNone:
|
||||
break
|
||||
check blockOpt.get().isOk
|
||||
blocks.add(blockOpt.get().value)
|
||||
|
||||
iter.close()
|
||||
|
||||
check blocks.len == 2
|
||||
check blocks[0].cid != blocks[1].cid
|
||||
|
||||
test "chunkData helper":
|
||||
let data = cast[seq[byte]]("hello world, this is a test of chunking")
|
||||
let chunkSize = 10
|
||||
let blocksResults = chunkData(data, chunkSize)
|
||||
|
||||
check blocksResults.len == 4
|
||||
|
||||
for br in blocksResults:
|
||||
check br.isOk
|
||||
|
||||
check blocksResults[^1].value.data.len == 9
|
||||
|
||||
test "file not found error":
|
||||
let chunker = newSyncChunker()
|
||||
let iterResult = chunker.chunkFile("/nonexistent/file.txt")
|
||||
|
||||
check iterResult.isErr
|
||||
check iterResult.error.kind == IoError
|
||||
|
||||
proc readBlocksAsync(pool: Taskpool, filePath: string): Future[seq[Block]] {.async.} =
|
||||
let chunker = newAsyncChunker(pool)
|
||||
let streamResult = await chunker.chunkFile(filePath)
|
||||
doAssert streamResult.isOk
|
||||
var stream = streamResult.value
|
||||
result = @[]
|
||||
while true:
|
||||
let blockOpt = await stream.nextBlock()
|
||||
if blockOpt.isNone:
|
||||
break
|
||||
doAssert blockOpt.get().isOk
|
||||
result.add(blockOpt.get().value)
|
||||
stream.close()
|
||||
|
||||
proc readBlocksAsyncWithConfig(pool: Taskpool, filePath: string, chunkSize: int): Future[seq[Block]] {.async.} =
|
||||
let config = newChunkerConfig(chunkSize)
|
||||
let chunker = newAsyncChunker(pool, config)
|
||||
let streamResult = await chunker.chunkFile(filePath)
|
||||
doAssert streamResult.isOk
|
||||
var stream = streamResult.value
|
||||
result = @[]
|
||||
while true:
|
||||
let blockOpt = await stream.nextBlock()
|
||||
if blockOpt.isNone:
|
||||
break
|
||||
doAssert blockOpt.get().isOk
|
||||
result.add(blockOpt.get().value)
|
||||
stream.close()
|
||||
|
||||
proc readTwoFilesAsync(pool: Taskpool, file1, file2: string): Future[(Block, Block)] {.async.} =
|
||||
let chunker1 = newAsyncChunker(pool)
|
||||
let chunker2 = newAsyncChunker(pool)
|
||||
|
||||
let stream1Result = await chunker1.chunkFile(file1)
|
||||
let stream2Result = await chunker2.chunkFile(file2)
|
||||
doAssert stream1Result.isOk
|
||||
doAssert stream2Result.isOk
|
||||
|
||||
var stream1 = stream1Result.value
|
||||
var stream2 = stream2Result.value
|
||||
|
||||
let block1Opt = await stream1.nextBlock()
|
||||
let block2Opt = await stream2.nextBlock()
|
||||
doAssert block1Opt.isSome
|
||||
doAssert block2Opt.isSome
|
||||
doAssert block1Opt.get().isOk
|
||||
doAssert block2Opt.get().isOk
|
||||
|
||||
stream1.close()
|
||||
stream2.close()
|
||||
return (block1Opt.get().value, block2Opt.get().value)
|
||||
|
||||
proc openNonexistentAsync(pool: Taskpool): Future[BResult[AsyncChunkStream]] {.async.} =
|
||||
let chunker = newAsyncChunker(pool)
|
||||
return await chunker.chunkFile("/nonexistent/async_file.txt")
|
||||
|
||||
suite "Async Chunker tests":
|
||||
var pool: Taskpool
|
||||
|
||||
setup:
|
||||
createDir(testDir)
|
||||
pool = Taskpool.new(numThreads = 2)
|
||||
|
||||
teardown:
|
||||
pool.shutdown()
|
||||
removeDir(testDir)
|
||||
|
||||
test "async chunk small file":
|
||||
let testFile = testDir / "async_small.txt"
|
||||
let data = "hello async world"
|
||||
writeFile(testFile, data)
|
||||
|
||||
let blocks = waitFor readBlocksAsync(pool, testFile)
|
||||
check blocks.len == 1
|
||||
check blocks[0].data == cast[seq[byte]](data)
|
||||
|
||||
test "async chunk multiple chunks":
|
||||
let testFile = testDir / "async_multi.txt"
|
||||
let chunkSize = 1024
|
||||
let totalSize = chunkSize * 3 + 256
|
||||
var data = newSeq[byte](totalSize)
|
||||
for i in 0 ..< totalSize:
|
||||
data[i] = byte(i mod 256)
|
||||
writeFile(testFile, cast[string](data))
|
||||
|
||||
let blocks = waitFor readBlocksAsyncWithConfig(pool, testFile, chunkSize)
|
||||
check blocks.len == 4
|
||||
check blocks[0].data.len == chunkSize
|
||||
check blocks[1].data.len == chunkSize
|
||||
check blocks[2].data.len == chunkSize
|
||||
check blocks[3].data.len == 256
|
||||
|
||||
test "async shared pool across chunkers":
|
||||
let testFile1 = testDir / "shared1.txt"
|
||||
let testFile2 = testDir / "shared2.txt"
|
||||
writeFile(testFile1, "file one content")
|
||||
writeFile(testFile2, "file two content")
|
||||
|
||||
let (block1, block2) = waitFor readTwoFilesAsync(pool, testFile1, testFile2)
|
||||
check block1.cid != block2.cid
|
||||
|
||||
test "async file not found":
|
||||
let streamResult = waitFor openNonexistentAsync(pool)
|
||||
check streamResult.isErr
|
||||
check streamResult.error.kind == IoError
|
||||
332
tests/test_dataset.nim
Normal file
332
tests/test_dataset.nim
Normal file
@ -0,0 +1,332 @@
|
||||
import std/[unittest, os, options]
|
||||
import chronos
|
||||
import results
|
||||
import ../blockstore/errors
|
||||
import ../blockstore/cid
|
||||
import ../blockstore/blocks
|
||||
import ../blockstore/dataset
|
||||
import ../blockstore/blockmap
|
||||
|
||||
const
|
||||
TestDir = getTempDir() / "nim_blockstore_dataset_test"
|
||||
DbPath = TestDir / "db"
|
||||
BlocksDir = TestDir / "blocks"
|
||||
|
||||
proc cleanup() =
|
||||
if dirExists(TestDir):
|
||||
removeDir(TestDir)
|
||||
|
||||
proc createTestDataset(store: DatasetStore, blockCount: int, chunkSize: int = 4096): Future[BResult[Dataset]] {.async.} =
|
||||
let builderResult = store.startDataset(chunkSize.uint32, some("test"))
|
||||
if builderResult.isErr:
|
||||
return err(builderResult.error)
|
||||
|
||||
var builder = builderResult.value
|
||||
|
||||
for i in 0 ..< blockCount:
|
||||
var data = newSeq[byte](chunkSize)
|
||||
for j in 0 ..< chunkSize:
|
||||
data[j] = byte((i * chunkSize + j) mod 256)
|
||||
|
||||
let blkResult = newBlock(data)
|
||||
if blkResult.isErr:
|
||||
return err(blkResult.error)
|
||||
|
||||
let addResult = await builder.addBlock(blkResult.value)
|
||||
if addResult.isErr:
|
||||
return err(addResult.error)
|
||||
|
||||
return await builder.finalize()
|
||||
|
||||
proc runDeleteExistingDataset() {.async.} =
|
||||
cleanup()
|
||||
createDir(TestDir)
|
||||
createDir(BlocksDir)
|
||||
|
||||
let storeResult = newDatasetStore(DbPath, BlocksDir)
|
||||
doAssert storeResult.isOk, "Failed to create store: " & $storeResult.error
|
||||
let store = storeResult.value
|
||||
defer: store.close()
|
||||
|
||||
let datasetResult = await createTestDataset(store, 5)
|
||||
doAssert datasetResult.isOk, "Failed to create dataset: " & $datasetResult.error
|
||||
let dataset = datasetResult.value
|
||||
|
||||
let manifestCid = dataset.manifestCid
|
||||
|
||||
let getResult1 = await store.getDataset(dataset.treeCid)
|
||||
doAssert getResult1.isOk
|
||||
doAssert getResult1.value.isSome, "Dataset should exist before deletion"
|
||||
|
||||
let deleteResult = await store.deleteDataset(manifestCid)
|
||||
doAssert deleteResult.isOk, "Delete should succeed: " & $deleteResult.error
|
||||
|
||||
let getResult2 = await store.getDataset(dataset.treeCid)
|
||||
doAssert getResult2.isOk
|
||||
doAssert getResult2.value.isNone, "Dataset should not exist after deletion"
|
||||
|
||||
cleanup()
|
||||
|
||||
proc runDeleteNonExistentDataset() {.async.} =
|
||||
cleanup()
|
||||
createDir(TestDir)
|
||||
createDir(BlocksDir)
|
||||
|
||||
let storeResult = newDatasetStore(DbPath, BlocksDir)
|
||||
doAssert storeResult.isOk
|
||||
let store = storeResult.value
|
||||
defer: store.close()
|
||||
|
||||
let fakeCidResult = cidFromString("bagazuayseaka5yn4pfmebc7bqkkoij6wb5x3o4jlvzq7flqhd63qalnrskwvy")
|
||||
doAssert fakeCidResult.isOk
|
||||
let fakeCid = fakeCidResult.value
|
||||
|
||||
let deleteResult = await store.deleteDataset(fakeCid)
|
||||
doAssert deleteResult.isErr, "Delete should fail for non-existent dataset"
|
||||
doAssert deleteResult.error.kind == DatasetNotFound
|
||||
|
||||
cleanup()
|
||||
|
||||
proc runStorageReleasedAfterDeletion() {.async.} =
|
||||
cleanup()
|
||||
createDir(TestDir)
|
||||
createDir(BlocksDir)
|
||||
|
||||
let storeResult = newDatasetStore(DbPath, BlocksDir)
|
||||
doAssert storeResult.isOk
|
||||
let store = storeResult.value
|
||||
defer: store.close()
|
||||
|
||||
let usedBefore = store.used()
|
||||
|
||||
let datasetResult = await createTestDataset(store, 10, 4096)
|
||||
doAssert datasetResult.isOk
|
||||
let dataset = datasetResult.value
|
||||
|
||||
let usedAfterCreate = store.used()
|
||||
doAssert usedAfterCreate > usedBefore, "Storage should increase after adding dataset"
|
||||
|
||||
let deleteResult = await store.deleteDataset(dataset.manifestCid)
|
||||
doAssert deleteResult.isOk
|
||||
|
||||
# Wait for pending deletions to be processed by the worker - hoping that
|
||||
# 500milli will do the job
|
||||
for _ in 0 ..< 10:
|
||||
await sleepAsync(50.milliseconds)
|
||||
if store.used() < usedAfterCreate:
|
||||
break
|
||||
|
||||
let usedAfterDelete = store.used()
|
||||
doAssert usedAfterDelete < usedAfterCreate, "Storage should decrease after deletion"
|
||||
|
||||
cleanup()
|
||||
|
||||
proc runMultipleDatasetsDeletion() {.async.} =
|
||||
cleanup()
|
||||
createDir(TestDir)
|
||||
createDir(BlocksDir)
|
||||
|
||||
let storeResult = newDatasetStore(DbPath, BlocksDir)
|
||||
doAssert storeResult.isOk
|
||||
let store = storeResult.value
|
||||
defer: store.close()
|
||||
|
||||
let dataset1Result = await createTestDataset(store, 3, 4096)
|
||||
doAssert dataset1Result.isOk
|
||||
let dataset1 = dataset1Result.value
|
||||
|
||||
let dataset2Result = await createTestDataset(store, 4, 4096)
|
||||
doAssert dataset2Result.isOk
|
||||
let dataset2 = dataset2Result.value
|
||||
|
||||
let get1Before = await store.getDataset(dataset1.treeCid)
|
||||
let get2Before = await store.getDataset(dataset2.treeCid)
|
||||
doAssert get1Before.isOk and get1Before.value.isSome
|
||||
doAssert get2Before.isOk and get2Before.value.isSome
|
||||
|
||||
let delete1Result = await store.deleteDataset(dataset1.manifestCid)
|
||||
doAssert delete1Result.isOk
|
||||
|
||||
let get1After = await store.getDataset(dataset1.treeCid)
|
||||
let get2After = await store.getDataset(dataset2.treeCid)
|
||||
doAssert get1After.isOk and get1After.value.isNone, "Dataset 1 should be deleted"
|
||||
doAssert get2After.isOk and get2After.value.isSome, "Dataset 2 should still exist"
|
||||
|
||||
let delete2Result = await store.deleteDataset(dataset2.manifestCid)
|
||||
doAssert delete2Result.isOk
|
||||
|
||||
let get2Final = await store.getDataset(dataset2.treeCid)
|
||||
doAssert get2Final.isOk and get2Final.value.isNone, "Dataset 2 should be deleted"
|
||||
|
||||
cleanup()
|
||||
|
||||
proc runDeleteDatasetWithManyBlocks() {.async.} =
|
||||
cleanup()
|
||||
createDir(TestDir)
|
||||
createDir(BlocksDir)
|
||||
|
||||
let storeResult = newDatasetStore(DbPath, BlocksDir)
|
||||
doAssert storeResult.isOk
|
||||
let store = storeResult.value
|
||||
defer: store.close()
|
||||
|
||||
let datasetResult = await createTestDataset(store, 100, 4096)
|
||||
doAssert datasetResult.isOk
|
||||
let dataset = datasetResult.value
|
||||
|
||||
doAssert dataset.blockCount == 100
|
||||
|
||||
let deleteResult = await store.deleteDataset(dataset.manifestCid)
|
||||
doAssert deleteResult.isOk, "Delete should succeed for dataset with many blocks"
|
||||
|
||||
let getResult = await store.getDataset(dataset.treeCid)
|
||||
doAssert getResult.isOk and getResult.value.isNone
|
||||
|
||||
cleanup()
|
||||
|
||||
proc runMappedBlockmapBasic() {.async.} =
|
||||
cleanup()
|
||||
createDir(TestDir)
|
||||
createDir(BlocksDir)
|
||||
|
||||
let storeResult = newDatasetStore(DbPath, BlocksDir, blockmapBackend = bmFile)
|
||||
doAssert storeResult.isOk, "Failed to create store with mapped blockmap: " & $storeResult.error
|
||||
let store = storeResult.value
|
||||
defer: store.close()
|
||||
|
||||
let datasetResult = await createTestDataset(store, 10)
|
||||
doAssert datasetResult.isOk, "Failed to create dataset with mapped blockmap: " & $datasetResult.error
|
||||
let dataset = datasetResult.value
|
||||
|
||||
doAssert dataset.blockCount == 10
|
||||
doAssert dataset.completed() == 10
|
||||
|
||||
for i in 0 ..< 10:
|
||||
let blockResult = await dataset.getBlock(i)
|
||||
doAssert blockResult.isOk
|
||||
doAssert blockResult.value.isSome
|
||||
|
||||
cleanup()
|
||||
|
||||
proc runMappedBlockmapRanges() {.async.} =
|
||||
cleanup()
|
||||
createDir(TestDir)
|
||||
createDir(BlocksDir)
|
||||
|
||||
let storeResult = newDatasetStore(DbPath, BlocksDir, blockmapBackend = bmFile)
|
||||
doAssert storeResult.isOk
|
||||
let store = storeResult.value
|
||||
defer: store.close()
|
||||
|
||||
let datasetResult = await createTestDataset(store, 20)
|
||||
doAssert datasetResult.isOk
|
||||
let dataset = datasetResult.value
|
||||
|
||||
let ranges = dataset.getBlockmapRanges()
|
||||
doAssert ranges.len >= 1, "Expected at least one range"
|
||||
|
||||
var totalBlocks: uint64 = 0
|
||||
for r in ranges:
|
||||
totalBlocks += r.count
|
||||
doAssert totalBlocks == 20, "Expected 20 blocks in ranges"
|
||||
|
||||
cleanup()
|
||||
|
||||
proc runMappedBlockmapPersistence() {.async.} =
|
||||
cleanup()
|
||||
createDir(TestDir)
|
||||
createDir(BlocksDir)
|
||||
|
||||
var treeCid: Cid
|
||||
block:
|
||||
let storeResult = newDatasetStore(DbPath, BlocksDir, blockmapBackend = bmFile)
|
||||
doAssert storeResult.isOk
|
||||
let store = storeResult.value
|
||||
|
||||
let datasetResult = await createTestDataset(store, 15)
|
||||
doAssert datasetResult.isOk
|
||||
treeCid = datasetResult.value.treeCid
|
||||
|
||||
store.close()
|
||||
|
||||
block:
|
||||
let storeResult = newDatasetStore(DbPath, BlocksDir, blockmapBackend = bmFile)
|
||||
doAssert storeResult.isOk
|
||||
let store = storeResult.value
|
||||
defer: store.close()
|
||||
|
||||
let getResult = await store.getDataset(treeCid)
|
||||
doAssert getResult.isOk
|
||||
doAssert getResult.value.isSome, "Dataset should persist after reopen"
|
||||
|
||||
let dataset = getResult.value.get()
|
||||
doAssert dataset.blockCount == 15
|
||||
doAssert dataset.completed() == 15
|
||||
|
||||
cleanup()
|
||||
|
||||
proc runMappedBlockmapDeletion() {.async.} =
|
||||
cleanup()
|
||||
createDir(TestDir)
|
||||
createDir(BlocksDir)
|
||||
|
||||
let storeResult = newDatasetStore(DbPath, BlocksDir, blockmapBackend = bmFile)
|
||||
doAssert storeResult.isOk
|
||||
let store = storeResult.value
|
||||
defer: store.close()
|
||||
|
||||
let datasetResult = await createTestDataset(store, 5)
|
||||
doAssert datasetResult.isOk
|
||||
let dataset = datasetResult.value
|
||||
|
||||
let manifestCid = dataset.manifestCid
|
||||
|
||||
let deleteResult = await store.deleteDataset(manifestCid)
|
||||
doAssert deleteResult.isOk
|
||||
|
||||
let getResult = await store.getDataset(dataset.treeCid)
|
||||
doAssert getResult.isOk
|
||||
doAssert getResult.value.isNone, "Dataset should not exist after deletion"
|
||||
|
||||
cleanup()
|
||||
|
||||
suite "Dataset deletion tests":
|
||||
setup:
|
||||
cleanup()
|
||||
|
||||
teardown:
|
||||
cleanup()
|
||||
|
||||
test "delete existing dataset":
|
||||
waitFor runDeleteExistingDataset()
|
||||
|
||||
test "delete non-existent dataset returns error":
|
||||
waitFor runDeleteNonExistentDataset()
|
||||
|
||||
test "storage released after deletion":
|
||||
waitFor runStorageReleasedAfterDeletion()
|
||||
|
||||
test "delete one dataset doesn't affect others":
|
||||
waitFor runMultipleDatasetsDeletion()
|
||||
|
||||
test "delete dataset with many blocks":
|
||||
waitFor runDeleteDatasetWithManyBlocks()
|
||||
|
||||
suite "Mapped blockmap backend tests":
|
||||
setup:
|
||||
cleanup()
|
||||
|
||||
teardown:
|
||||
cleanup()
|
||||
|
||||
test "basic dataset operations with mapped blockmap":
|
||||
waitFor runMappedBlockmapBasic()
|
||||
|
||||
test "blockmap ranges work with mapped backend":
|
||||
waitFor runMappedBlockmapRanges()
|
||||
|
||||
test "mapped blockmap persists across reopens":
|
||||
waitFor runMappedBlockmapPersistence()
|
||||
|
||||
test "mapped blockmap files deleted with dataset":
|
||||
waitFor runMappedBlockmapDeletion()
|
||||
745
tests/test_merkle.nim
Normal file
745
tests/test_merkle.nim
Normal file
@ -0,0 +1,745 @@
|
||||
import std/[unittest, os, options, sets, syncio, strutils]
|
||||
import results
|
||||
import leveldbstatic as leveldb
|
||||
import libp2p/multicodec
|
||||
import ../blockstore/errors
|
||||
import ../blockstore/cid
|
||||
import ../blockstore/merkle
|
||||
import ../blockstore/sha256
|
||||
|
||||
const
|
||||
TestDbPath = "/tmp/test_merkle_db"
|
||||
TestPackedPath = "/tmp/test_merkle_packed.tree"
|
||||
|
||||
proc cleanup() =
|
||||
if dirExists(TestDbPath):
|
||||
removeDir(TestDbPath)
|
||||
if fileExists(TestPackedPath):
|
||||
removeFile(TestPackedPath)
|
||||
|
||||
suite "MerkleTreeBuilder tests":
|
||||
test "tree builder basic":
|
||||
var builder = newMerkleTreeBuilder()
|
||||
|
||||
builder.addBlock(cast[seq[byte]]("block1"))
|
||||
builder.addBlock(cast[seq[byte]]("block2"))
|
||||
builder.addBlock(cast[seq[byte]]("block3"))
|
||||
|
||||
check builder.blockCount == 3
|
||||
|
||||
builder.buildTree()
|
||||
let rootCidResult = builder.rootCid()
|
||||
check rootCidResult.isOk
|
||||
|
||||
let rootCid = rootCidResult.value
|
||||
check rootCid.cidver == CIDv1
|
||||
check rootCid.mcodec == LogosStorageTree
|
||||
|
||||
test "single block proof":
|
||||
var builder = newMerkleTreeBuilder()
|
||||
let blockData = cast[seq[byte]]("hello world")
|
||||
builder.addBlock(blockData)
|
||||
builder.buildTree()
|
||||
|
||||
let rootCidResult = builder.rootCid()
|
||||
check rootCidResult.isOk
|
||||
let rootCid = rootCidResult.value
|
||||
|
||||
let proofResult = builder.getProof(0)
|
||||
check proofResult.isOk
|
||||
let proof = proofResult.value
|
||||
|
||||
let mhResult = rootCid.mhash()
|
||||
check mhResult.isOk
|
||||
let mh = mhResult.get()
|
||||
let rootBytes = mh.data.buffer[mh.dpos .. mh.dpos + mh.size - 1]
|
||||
let verifyResult = proof.verify(rootBytes, blockData)
|
||||
check verifyResult.isOk
|
||||
check verifyResult.value == true
|
||||
|
||||
test "proof fails for wrong data":
|
||||
var builder = newMerkleTreeBuilder()
|
||||
let blockData = cast[seq[byte]]("hello world")
|
||||
builder.addBlock(blockData)
|
||||
builder.buildTree()
|
||||
|
||||
let rootCidResult = builder.rootCid()
|
||||
check rootCidResult.isOk
|
||||
let rootCid = rootCidResult.value
|
||||
|
||||
let proofResult = builder.getProof(0)
|
||||
check proofResult.isOk
|
||||
let proof = proofResult.value
|
||||
|
||||
let wrongData = cast[seq[byte]]("wrong data")
|
||||
let mhResult = rootCid.mhash()
|
||||
check mhResult.isOk
|
||||
let mh = mhResult.get()
|
||||
let rootBytes = mh.data.buffer[mh.dpos .. mh.dpos + mh.size - 1]
|
||||
let verifyResult = proof.verify(rootBytes, wrongData)
|
||||
check verifyResult.isOk
|
||||
check verifyResult.value == false
|
||||
|
||||
test "deterministic root":
|
||||
var builder1 = newMerkleTreeBuilder()
|
||||
builder1.addBlock(cast[seq[byte]]("a"))
|
||||
builder1.addBlock(cast[seq[byte]]("b"))
|
||||
builder1.buildTree()
|
||||
|
||||
var builder2 = newMerkleTreeBuilder()
|
||||
builder2.addBlock(cast[seq[byte]]("a"))
|
||||
builder2.addBlock(cast[seq[byte]]("b"))
|
||||
builder2.buildTree()
|
||||
|
||||
check builder1.rootCid().value == builder2.rootCid().value
|
||||
|
||||
test "proof structure for 4-leaf tree":
|
||||
var builder = newMerkleTreeBuilder()
|
||||
builder.addBlock(cast[seq[byte]]("a"))
|
||||
builder.addBlock(cast[seq[byte]]("b"))
|
||||
builder.addBlock(cast[seq[byte]]("c"))
|
||||
builder.addBlock(cast[seq[byte]]("d"))
|
||||
builder.buildTree()
|
||||
|
||||
let proof = builder.getProof(1).value
|
||||
|
||||
check proof.index == 1
|
||||
check proof.leafCount == 4
|
||||
check proof.path.len == 2
|
||||
|
||||
suite "Streaming Merkle Storage tests":
|
||||
setup:
|
||||
cleanup()
|
||||
|
||||
teardown:
|
||||
cleanup()
|
||||
|
||||
test "computeNumLevels":
|
||||
check computeNumLevels(0) == 0
|
||||
check computeNumLevels(1) == 1
|
||||
check computeNumLevels(2) == 2
|
||||
check computeNumLevels(3) == 3
|
||||
check computeNumLevels(4) == 3
|
||||
check computeNumLevels(5) == 4
|
||||
check computeNumLevels(8) == 4
|
||||
check computeNumLevels(16) == 5
|
||||
|
||||
test "nodesAtLevel":
|
||||
check nodesAtLevel(4, 0) == 4
|
||||
check nodesAtLevel(4, 1) == 2
|
||||
check nodesAtLevel(4, 2) == 1
|
||||
|
||||
check nodesAtLevel(5, 0) == 5
|
||||
check nodesAtLevel(5, 1) == 3
|
||||
check nodesAtLevel(5, 2) == 2
|
||||
check nodesAtLevel(5, 3) == 1
|
||||
|
||||
test "computeNumLevels edge cases":
|
||||
check computeNumLevels(1'u64 shl 20) == 21 # 1M leaves -> 21 levels
|
||||
check computeNumLevels(1'u64 shl 30) == 31 # 1B leaves -> 31 levels
|
||||
check computeNumLevels(1'u64 shl 40) == 41 # 1T leaves -> 41 levels
|
||||
check computeNumLevels(1'u64 shl 50) == 51
|
||||
check computeNumLevels(1'u64 shl 60) == 61
|
||||
check computeNumLevels(1'u64 shl 63) == 64 # 2^63 leaves -> 64 levels
|
||||
|
||||
check computeNumLevels((1'u64 shl 20) + 1) == 22
|
||||
check computeNumLevels((1'u64 shl 30) + 1) == 32
|
||||
check computeNumLevels((1'u64 shl 63) + 1) == 65
|
||||
|
||||
check computeNumLevels(high(uint64)) == 65 # 2^64 - 1 -> 65 levels
|
||||
|
||||
test "nodesAtLevel edge cases":
|
||||
let bigLeafCount = 1'u64 shl 40 # 1 trillion leaves
|
||||
check nodesAtLevel(bigLeafCount, 0) == bigLeafCount
|
||||
check nodesAtLevel(bigLeafCount, 10) == 1'u64 shl 30
|
||||
check nodesAtLevel(bigLeafCount, 20) == 1'u64 shl 20
|
||||
check nodesAtLevel(bigLeafCount, 40) == 1 # root level
|
||||
|
||||
let oddLeafCount = (1'u64 shl 40) + 7
|
||||
check nodesAtLevel(oddLeafCount, 0) == oddLeafCount
|
||||
check nodesAtLevel(oddLeafCount, 1) == (oddLeafCount + 1) shr 1
|
||||
check nodesAtLevel(oddLeafCount, 40) == 2
|
||||
check nodesAtLevel(oddLeafCount, 41) == 1
|
||||
|
||||
let maxLeaves = high(uint64)
|
||||
check nodesAtLevel(maxLeaves, 0) == maxLeaves
|
||||
check nodesAtLevel(maxLeaves, 63) == 2
|
||||
check nodesAtLevel(maxLeaves, 64) == 1 # root
|
||||
|
||||
check nodesAtLevel(3, 1) == 2
|
||||
check nodesAtLevel(7, 2) == 2
|
||||
check nodesAtLevel(9, 3) == 2
|
||||
check nodesAtLevel(17, 4) == 2
|
||||
|
||||
test "hashConcat deterministic":
|
||||
var h1, h2: MerkleHash
|
||||
for i in 0 ..< 32:
|
||||
h1[i] = byte(i)
|
||||
h2[i] = byte(i + 32)
|
||||
|
||||
let result1 = hashConcat(h1, h2)
|
||||
let result2 = hashConcat(h1, h2)
|
||||
check result1 == result2
|
||||
|
||||
let result3 = hashConcat(h2, h1)
|
||||
check result1 != result3
|
||||
|
||||
test "LevelDB streaming builder - single leaf":
|
||||
let db = leveldb.open(TestDbPath)
|
||||
defer: db.close()
|
||||
|
||||
let storage = newLevelDbMerkleStorage(db, "tree1")
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
let leafHash = sha256Hash(cast[seq[byte]]("block0"))
|
||||
discard builder.addLeaf(leafHash)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isOk
|
||||
let root = rootResult.value
|
||||
|
||||
check root == leafHash
|
||||
check builder.leafCount == 1
|
||||
|
||||
test "LevelDB streaming builder - two leaves":
|
||||
let db = leveldb.open(TestDbPath)
|
||||
defer: db.close()
|
||||
|
||||
let storage = newLevelDbMerkleStorage(db, "tree2")
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
let h0 = sha256Hash(cast[seq[byte]]("block0"))
|
||||
let h1 = sha256Hash(cast[seq[byte]]("block1"))
|
||||
discard builder.addLeaf(h0)
|
||||
discard builder.addLeaf(h1)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isOk
|
||||
let root = rootResult.value
|
||||
|
||||
let expected = hashConcat(h0, h1)
|
||||
check root == expected
|
||||
|
||||
test "LevelDB streaming builder - four leaves":
|
||||
let db = leveldb.open(TestDbPath)
|
||||
defer: db.close()
|
||||
|
||||
let storage = newLevelDbMerkleStorage(db, "tree4")
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
var hashes: seq[MerkleHash]
|
||||
for i in 0 ..< 4:
|
||||
let h = sha256Hash(cast[seq[byte]]("block" & $i))
|
||||
hashes.add(h)
|
||||
discard builder.addLeaf(h)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isOk
|
||||
let root = rootResult.value
|
||||
|
||||
let left = hashConcat(hashes[0], hashes[1])
|
||||
let right = hashConcat(hashes[2], hashes[3])
|
||||
let expected = hashConcat(left, right)
|
||||
check root == expected
|
||||
|
||||
test "LevelDB reader and proof generation":
|
||||
let db = leveldb.open(TestDbPath)
|
||||
defer: db.close()
|
||||
|
||||
let storage = newLevelDbMerkleStorage(db, "treeProof")
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
var hashes: seq[MerkleHash]
|
||||
for i in 0 ..< 4:
|
||||
let h = sha256Hash(cast[seq[byte]]("block" & $i))
|
||||
hashes.add(h)
|
||||
discard builder.addLeaf(h)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isOk
|
||||
let root = rootResult.value
|
||||
|
||||
let reader = newMerkleReader(storage)
|
||||
check reader.leafCount == 4
|
||||
|
||||
let rootOpt = reader.root()
|
||||
check rootOpt.isSome
|
||||
check rootOpt.get() == root
|
||||
|
||||
let proofResult = reader.getProof(1)
|
||||
check proofResult.isOk
|
||||
let proof = proofResult.value
|
||||
|
||||
check proof.index == 1
|
||||
check proof.leafCount == 4
|
||||
check proof.path.len == 2
|
||||
|
||||
check verify(proof, root, hashes[1])
|
||||
check not verify(proof, root, hashes[0])
|
||||
|
||||
test "LevelDB proof for all leaves":
|
||||
let db = leveldb.open(TestDbPath)
|
||||
defer: db.close()
|
||||
|
||||
let storage = newLevelDbMerkleStorage(db, "treeAllProofs")
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
var hashes: seq[MerkleHash]
|
||||
for i in 0 ..< 8:
|
||||
let h = sha256Hash(cast[seq[byte]]("block" & $i))
|
||||
hashes.add(h)
|
||||
discard builder.addLeaf(h)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isOk
|
||||
let root = rootResult.value
|
||||
|
||||
let reader = newMerkleReader(storage)
|
||||
|
||||
for i in 0 ..< 8:
|
||||
let proofResult = reader.getProof(uint64(i))
|
||||
check proofResult.isOk
|
||||
let proof = proofResult.value
|
||||
check verify(proof, root, hashes[i])
|
||||
|
||||
test "Packed storage - basic write and read":
|
||||
let storage = newPackedMerkleStorage(TestPackedPath, forWriting = true).get()
|
||||
defer: discard storage.close()
|
||||
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
var hashes: seq[MerkleHash]
|
||||
for i in 0 ..< 4:
|
||||
let h = sha256Hash(cast[seq[byte]]("block" & $i))
|
||||
hashes.add(h)
|
||||
discard builder.addLeaf(h)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isOk
|
||||
let root = rootResult.value
|
||||
|
||||
let (leafCount, numLevels) = storage.getMetadata()
|
||||
check leafCount == 4
|
||||
check numLevels == 3
|
||||
|
||||
test "Packed storage - read after close":
|
||||
block:
|
||||
let storage = newPackedMerkleStorage(TestPackedPath, forWriting = true).get()
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
for i in 0 ..< 4:
|
||||
let h = sha256Hash(cast[seq[byte]]("block" & $i))
|
||||
discard builder.addLeaf(h)
|
||||
|
||||
discard builder.finalize()
|
||||
discard storage.close()
|
||||
|
||||
block:
|
||||
let storage = newPackedMerkleStorage(TestPackedPath).get()
|
||||
defer: discard storage.close()
|
||||
|
||||
let (leafCount, numLevels) = storage.getMetadata()
|
||||
check leafCount == 4
|
||||
check numLevels == 3
|
||||
|
||||
let reader = newMerkleReader(storage)
|
||||
check reader.leafCount == 4
|
||||
|
||||
let rootOpt = reader.root()
|
||||
check rootOpt.isSome
|
||||
|
||||
test "Packed storage - proof verification":
|
||||
var hashes: seq[MerkleHash]
|
||||
var root: MerkleHash
|
||||
|
||||
block:
|
||||
let storage = newPackedMerkleStorage(TestPackedPath, forWriting = true).get()
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
for i in 0 ..< 8:
|
||||
let h = sha256Hash(cast[seq[byte]]("block" & $i))
|
||||
hashes.add(h)
|
||||
discard builder.addLeaf(h)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isOk
|
||||
root = rootResult.value
|
||||
discard storage.close()
|
||||
|
||||
block:
|
||||
let storage = newPackedMerkleStorage(TestPackedPath).get()
|
||||
defer: discard storage.close()
|
||||
|
||||
let reader = newMerkleReader(storage)
|
||||
|
||||
for i in 0 ..< 8:
|
||||
let proofResult = reader.getProof(uint64(i))
|
||||
check proofResult.isOk
|
||||
let proof = proofResult.value
|
||||
check verify(proof, root, hashes[i])
|
||||
|
||||
test "Non-power-of-two leaves - 5 leaves":
|
||||
let db = leveldb.open(TestDbPath)
|
||||
defer: db.close()
|
||||
|
||||
let storage = newLevelDbMerkleStorage(db, "tree5")
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
var hashes: seq[MerkleHash]
|
||||
for i in 0 ..< 5:
|
||||
let h = sha256Hash(cast[seq[byte]]("block" & $i))
|
||||
hashes.add(h)
|
||||
discard builder.addLeaf(h)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isOk
|
||||
let root = rootResult.value
|
||||
|
||||
let reader = newMerkleReader(storage)
|
||||
check reader.leafCount == 5
|
||||
|
||||
for i in 0 ..< 5:
|
||||
let proofResult = reader.getProof(uint64(i))
|
||||
check proofResult.isOk
|
||||
let proof = proofResult.value
|
||||
check verify(proof, root, hashes[i])
|
||||
|
||||
test "Non-power-of-two leaves - 7 leaves":
|
||||
let db = leveldb.open(TestDbPath)
|
||||
defer: db.close()
|
||||
|
||||
let storage = newLevelDbMerkleStorage(db, "tree7")
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
var hashes: seq[MerkleHash]
|
||||
for i in 0 ..< 7:
|
||||
let h = sha256Hash(cast[seq[byte]]("block" & $i))
|
||||
hashes.add(h)
|
||||
discard builder.addLeaf(h)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isOk
|
||||
let root = rootResult.value
|
||||
|
||||
let reader = newMerkleReader(storage)
|
||||
|
||||
for i in 0 ..< 7:
|
||||
let proofResult = reader.getProof(uint64(i))
|
||||
check proofResult.isOk
|
||||
let proof = proofResult.value
|
||||
check verify(proof, root, hashes[i])
|
||||
|
||||
test "Large tree - 1000 leaves":
|
||||
let db = leveldb.open(TestDbPath)
|
||||
defer: db.close()
|
||||
|
||||
const numLeaves = 1000
|
||||
let storage = newLevelDbMerkleStorage(db, "tree1000")
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
var hashes: seq[MerkleHash]
|
||||
for i in 0 ..< numLeaves:
|
||||
let h = sha256Hash(cast[seq[byte]]("block" & $i))
|
||||
hashes.add(h)
|
||||
discard builder.addLeaf(h)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isOk
|
||||
let root = rootResult.value
|
||||
|
||||
let reader = newMerkleReader(storage)
|
||||
check reader.leafCount == numLeaves
|
||||
|
||||
let testIndices = @[0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
|
||||
990, 991, 992, 993, 994, 995, 996, 997, 998, 999,
|
||||
100, 250, 500, 750, 333, 666, 512, 511, 513]
|
||||
for i in testIndices:
|
||||
let proofResult = reader.getProof(uint64(i))
|
||||
check proofResult.isOk
|
||||
let proof = proofResult.value
|
||||
check verify(proof, root, hashes[i])
|
||||
|
||||
test "Large tree - 997 leaves":
|
||||
let db = leveldb.open(TestDbPath)
|
||||
defer: db.close()
|
||||
|
||||
const numLeaves = 997
|
||||
let storage = newLevelDbMerkleStorage(db, "tree997")
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
var hashes: seq[MerkleHash]
|
||||
for i in 0 ..< numLeaves:
|
||||
let h = sha256Hash(cast[seq[byte]]("block" & $i))
|
||||
hashes.add(h)
|
||||
discard builder.addLeaf(h)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isOk
|
||||
let root = rootResult.value
|
||||
|
||||
let reader = newMerkleReader(storage)
|
||||
check reader.leafCount == numLeaves
|
||||
|
||||
for i in 0 ..< numLeaves:
|
||||
let proofResult = reader.getProof(uint64(i))
|
||||
check proofResult.isOk
|
||||
let proof = proofResult.value
|
||||
if not verify(proof, root, hashes[i]):
|
||||
echo "Proof verification failed for leaf ", i
|
||||
check false
|
||||
|
||||
test "Large tree - 1024 leaves":
|
||||
let db = leveldb.open(TestDbPath)
|
||||
defer: db.close()
|
||||
|
||||
const numLeaves = 1024
|
||||
let storage = newLevelDbMerkleStorage(db, "tree1024")
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
var hashes: seq[MerkleHash]
|
||||
for i in 0 ..< numLeaves:
|
||||
let h = sha256Hash(cast[seq[byte]]("block" & $i))
|
||||
hashes.add(h)
|
||||
discard builder.addLeaf(h)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isOk
|
||||
let root = rootResult.value
|
||||
|
||||
let reader = newMerkleReader(storage)
|
||||
check reader.leafCount == numLeaves
|
||||
|
||||
let testIndices = @[0, 1, 2, 511, 512, 513, 1022, 1023,
|
||||
256, 768, 128, 384, 640, 896]
|
||||
for i in testIndices:
|
||||
let proofResult = reader.getProof(uint64(i))
|
||||
check proofResult.isOk
|
||||
let proof = proofResult.value
|
||||
check verify(proof, root, hashes[i])
|
||||
|
||||
test "Large packed storage - 500000 leaves":
|
||||
const numLeaves = 500000
|
||||
var hashes: seq[MerkleHash]
|
||||
var root: MerkleHash
|
||||
|
||||
block:
|
||||
let storage = newPackedMerkleStorage(TestPackedPath, forWriting = true).get()
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
for i in 0 ..< numLeaves:
|
||||
let h = sha256Hash(cast[seq[byte]]("block" & $i))
|
||||
hashes.add(h)
|
||||
discard builder.addLeaf(h)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isOk
|
||||
root = rootResult.value
|
||||
discard storage.close()
|
||||
|
||||
block:
|
||||
let storage = newPackedMerkleStorage(TestPackedPath).get()
|
||||
defer: discard storage.close()
|
||||
|
||||
let reader = newMerkleReader(storage)
|
||||
check reader.leafCount == numLeaves
|
||||
|
||||
for i in 0 ..< numLeaves:
|
||||
let proofResult = reader.getProof(uint64(i))
|
||||
check proofResult.isOk
|
||||
let proof = proofResult.value
|
||||
if not verify(proof, root, hashes[i]):
|
||||
echo "Packed proof verification failed for leaf ", i
|
||||
doAssert false
|
||||
|
||||
test "Empty tree finalize fails":
|
||||
let db = leveldb.open(TestDbPath)
|
||||
defer: db.close()
|
||||
|
||||
let storage = newLevelDbMerkleStorage(db, "emptyTree")
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isErr
|
||||
|
||||
test "Invalid proof index":
|
||||
let db = leveldb.open(TestDbPath)
|
||||
defer: db.close()
|
||||
|
||||
let storage = newLevelDbMerkleStorage(db, "treeInvalid")
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
for i in 0 ..< 4:
|
||||
let h = sha256Hash(cast[seq[byte]]("block" & $i))
|
||||
discard builder.addLeaf(h)
|
||||
|
||||
discard builder.finalize()
|
||||
|
||||
let reader = newMerkleReader(storage)
|
||||
let proofResult = reader.getProof(10)
|
||||
check proofResult.isErr
|
||||
|
||||
suite "PackedMerkleStorage error cases":
|
||||
setup:
|
||||
cleanup()
|
||||
|
||||
teardown:
|
||||
cleanup()
|
||||
|
||||
test "Invalid magic in packed file":
|
||||
# Here create a file with wrong magic bytes
|
||||
let f = syncio.open(TestPackedPath, fmWrite)
|
||||
var wrongMagic: uint32 = 0xDEADBEEF'u32
|
||||
var version: uint32 = 2
|
||||
var leafCount: uint64 = 0
|
||||
var numLevels: int32 = 0
|
||||
discard f.writeBuffer(addr wrongMagic, 4)
|
||||
discard f.writeBuffer(addr version, 4)
|
||||
discard f.writeBuffer(addr leafCount, 8)
|
||||
discard f.writeBuffer(addr numLevels, 4)
|
||||
f.close()
|
||||
|
||||
let res = newPackedMerkleStorage(TestPackedPath, forWriting = false)
|
||||
check res.isErr
|
||||
check res.error.msg == "Invalid packed merkle file magic"
|
||||
|
||||
test "Unsupported version in packed file":
|
||||
# Now create a file with correct magic but wrong version
|
||||
let f = syncio.open(TestPackedPath, fmWrite)
|
||||
var magic: uint32 = 0x534B4C4D'u32 # PackedMagic
|
||||
var wrongVersion: uint32 = 99
|
||||
var leafCount: uint64 = 0
|
||||
var numLevels: int32 = 0
|
||||
discard f.writeBuffer(addr magic, 4)
|
||||
discard f.writeBuffer(addr wrongVersion, 4)
|
||||
discard f.writeBuffer(addr leafCount, 8)
|
||||
discard f.writeBuffer(addr numLevels, 4)
|
||||
f.close()
|
||||
|
||||
let res = newPackedMerkleStorage(TestPackedPath, forWriting = false)
|
||||
check res.isErr
|
||||
check "Unsupported packed merkle file version" in res.error.msg
|
||||
|
||||
test "File too small for header":
|
||||
# And now create a file that's too small
|
||||
let f = syncio.open(TestPackedPath, fmWrite)
|
||||
var magic: uint32 = 0x534B4C4D'u32
|
||||
discard f.writeBuffer(addr magic, 4)
|
||||
f.close()
|
||||
|
||||
let res = newPackedMerkleStorage(TestPackedPath, forWriting = false)
|
||||
check res.isErr
|
||||
check res.error.msg == "File too small for header"
|
||||
|
||||
suite "MerkleTreeBuilder edge cases":
|
||||
test "root() returns none when not built":
|
||||
var builder = newMerkleTreeBuilder()
|
||||
builder.addBlock(cast[seq[byte]]("block1"))
|
||||
let rootOpt = builder.root()
|
||||
check rootOpt.isNone
|
||||
|
||||
test "rootCid() fails when not built":
|
||||
var builder = newMerkleTreeBuilder()
|
||||
builder.addBlock(cast[seq[byte]]("block1"))
|
||||
let cidResult = builder.rootCid()
|
||||
check cidResult.isErr
|
||||
check "Tree not built" in cidResult.error.msg
|
||||
|
||||
test "getProof() fails when not built":
|
||||
var builder = newMerkleTreeBuilder()
|
||||
builder.addBlock(cast[seq[byte]]("block1"))
|
||||
let proofResult = builder.getProof(0)
|
||||
check proofResult.isErr
|
||||
check "Tree not built" in proofResult.error.msg
|
||||
|
||||
test "addBlock after buildTree raises Defect":
|
||||
var builder = newMerkleTreeBuilder()
|
||||
builder.addBlock(cast[seq[byte]]("block1"))
|
||||
builder.buildTree()
|
||||
|
||||
var raised = false
|
||||
try:
|
||||
builder.addBlock(cast[seq[byte]]("block2"))
|
||||
except Defect:
|
||||
raised = true
|
||||
check raised
|
||||
|
||||
test "buildTree on empty builder does nothing":
|
||||
var builder = newMerkleTreeBuilder()
|
||||
builder.buildTree()
|
||||
let rootOpt = builder.root()
|
||||
check rootOpt.isNone
|
||||
|
||||
suite "Proof verification edge cases":
|
||||
test "verify() with wrong root length returns error":
|
||||
var builder = newMerkleTreeBuilder()
|
||||
let blockData = cast[seq[byte]]("hello world")
|
||||
builder.addBlock(blockData)
|
||||
builder.buildTree()
|
||||
|
||||
let proofResult = builder.getProof(0)
|
||||
check proofResult.isOk
|
||||
let proof = proofResult.value
|
||||
|
||||
let
|
||||
shortRoot: array[16, byte] = default(array[16, byte])
|
||||
verifyResult = proof.verify(shortRoot, blockData)
|
||||
check verifyResult.isErr
|
||||
|
||||
suite "rootToCid function":
|
||||
setup:
|
||||
cleanup()
|
||||
|
||||
teardown:
|
||||
cleanup()
|
||||
|
||||
test "rootToCid converts hash to valid CID":
|
||||
let db = leveldb.open(TestDbPath)
|
||||
defer: db.close()
|
||||
|
||||
let storage = newLevelDbMerkleStorage(db, "rootToCidTest")
|
||||
var builder = newStreamingMerkleBuilder(storage)
|
||||
|
||||
let h = sha256Hash(cast[seq[byte]]("block0"))
|
||||
discard builder.addLeaf(h)
|
||||
|
||||
let rootResult = builder.finalize()
|
||||
check rootResult.isOk
|
||||
let root = rootResult.value
|
||||
|
||||
let cidResult = rootToCid(root)
|
||||
check cidResult.isOk
|
||||
let cid = cidResult.value
|
||||
check cid.cidver == CIDv1
|
||||
check cid.mcodec == LogosStorageTree
|
||||
|
||||
suite "getRequiredLeafIndices function":
|
||||
test "single leaf at start":
|
||||
let res = getRequiredLeafIndices(0, 1, 4)
|
||||
check 1 in res
|
||||
|
||||
test "single leaf in middle":
|
||||
let res = getRequiredLeafIndices(2, 1, 4)
|
||||
check 3 in res
|
||||
|
||||
test "consecutive pair - no extra leaves needed at first level":
|
||||
let res = getRequiredLeafIndices(0, 2, 4)
|
||||
check 2 in res
|
||||
check 3 in res
|
||||
|
||||
test "full range - no extra leaves needed":
|
||||
let res = getRequiredLeafIndices(0, 4, 4)
|
||||
check res.len == 0
|
||||
|
||||
test "larger tree - partial range":
|
||||
let res = getRequiredLeafIndices(0, 3, 8)
|
||||
check 3 in res
|
||||
check 4 in res
|
||||
check 5 in res
|
||||
check 6 in res
|
||||
check 7 in res
|
||||
|
||||
test "non-power-of-two total leaves":
|
||||
let res = getRequiredLeafIndices(0, 2, 5)
|
||||
check 2 in res
|
||||
check 3 in res
|
||||
check 4 in res
|
||||
Loading…
x
Reference in New Issue
Block a user