nim-codex/codex/chunker.nim
Bulat-Ziganshin f24ded0f76
Download files without padding (#218)
The initial goal of this patch was to allow to download of a file via REST API in exactly the same size as it was uploaded, which required adding fields Chunker.offset and Manifest.originalBytes to keep that size. On top of that, we added more integrity checks to operations on Manifest, and reorganized TestNode.nim to test the actual interaction between node.store and node.retrieve operations.

Note that the wire format of Manifest was changed, so we need to recreate all BlockStores.

* Download without padding
* Fixed chunker tests
* Chunker: get rid of RabinChunker
* Verify offset in the chunker tests
* Use manifest.originalBytesPadded in StoreStream.size
* StoreStream: replace emptyBlock with zeroMem
* Manifest.bytes: compute how many bytes corresponding StoreStream(Manifest, pad) will return
* Manifest: verify originalBytes and originalLen on new/encode/decode
Also set originalBytes in each Manifest creation/update scenario
* Manifest: comments, split code into sections
* Reordered parameters to deal with int64 size in 32-bit builds
* TestNode.nim: combine Store and Retrieve tests
1. Instead of copy-pasting code from node.nim, new test calls node.store() and node.retrieve() in order to check that they can correctly store and then retrieve data
2. New test compares only file contents, manifest contents considered an implementation detail
3. New test chunks at odd chunkSize=BlockSize/1.618 in order to ensure that data retrieved correctly even when buffer sizes mismatch
* TestNode.nim: code refactoring
* Manifest.add: one more test
* Manifest.verify: return Result instead of raising Defect
* Node.store: added blockSize parameter
2022-08-24 15:15:59 +03:00

130 lines
3.1 KiB
Nim

## Nim-Codex
## Copyright (c) 2021 Status Research & Development GmbH
## Licensed under either of
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
## at your option.
## This file may not be copied, modified, or distributed except according to
## those terms.
# TODO: This is super inneficient and needs a rewrite, but it'll do for now
import pkg/upraises
push: {.upraises: [].}
import pkg/chronicles
import pkg/questionable
import pkg/questionable/results
import pkg/chronos
import pkg/libp2p except shuffle
import ./blocktype
export blocktype
const
DefaultChunkSize* = BlockSize
type
# default reader type
ChunkBuffer* = ptr UncheckedArray[byte]
Reader* = proc(data: ChunkBuffer, len: int): Future[int] {.gcsafe, raises: [Defect].}
# Reader that splits input data into fixed-size chunks
Chunker* = ref object
reader*: Reader # Procedure called to actually read the data
offset*: int # Bytes read so far (position in the stream)
chunkSize*: Natural # Size of each chunk
pad*: bool # Pad last chunk to chunkSize?
FileChunker* = Chunker
LPStreamChunker* = Chunker
proc getBytes*(c: Chunker): Future[seq[byte]] {.async.} =
## returns a chunk of bytes from
## the instantiated chunker
##
var buff = newSeq[byte](c.chunkSize)
let read = await c.reader(cast[ChunkBuffer](addr buff[0]), buff.len)
if read <= 0:
return @[]
c.offset += read
if not c.pad and buff.len > read:
buff.setLen(read)
return buff
func new*(
T: type Chunker,
reader: Reader,
chunkSize = DefaultChunkSize,
pad = true): T =
T(reader: reader,
offset: 0,
chunkSize: chunkSize,
pad: pad)
proc new*(
T: type LPStreamChunker,
stream: LPStream,
chunkSize = DefaultChunkSize,
pad = true): T =
## create the default File chunker
##
proc reader(data: ChunkBuffer, len: int): Future[int]
{.gcsafe, async, raises: [Defect].} =
var res = 0
try:
while res < len:
res += await stream.readOnce(addr data[res], len - res)
except LPStreamEOFError as exc:
trace "LPStreamChunker stream Eof", exc = exc.msg
except CatchableError as exc:
trace "CatchableError exception", exc = exc.msg
raise newException(Defect, exc.msg)
return res
T.new(
reader = reader,
chunkSize = chunkSize,
pad = pad)
proc new*(
T: type FileChunker,
file: File,
chunkSize = DefaultChunkSize,
pad = true): T =
## create the default File chunker
##
proc reader(data: ChunkBuffer, len: int): Future[int]
{.gcsafe, async, raises: [Defect].} =
var total = 0
try:
while total < len:
let res = file.readBuffer(addr data[total], len - total)
if res <= 0:
break
total += res
except IOError as exc:
trace "Exception reading file", exc = exc.msg
except CatchableError as exc:
trace "CatchableError exception", exc = exc.msg
raise newException(Defect, exc.msg)
return total
T.new(
reader = reader,
chunkSize = chunkSize,
pad = pad)