nim-codex/codex/slots/sampler/sampler.nim

## Nim-Codex
## Copyright (c) 2023 Status Research & Development GmbH
## Licensed under either of
##  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
##  * MIT license ([LICENSE-MIT](LICENSE-MIT))
## at your option.
## This file may not be copied, modified, or distributed except according to
## those terms.

import std/sugar
import std/sequtils

import pkg/chronos
import pkg/questionable
import pkg/questionable/results
import pkg/constantine/math/arithmetic
import pkg/poseidon2
import pkg/poseidon2/types
import pkg/poseidon2/io
import pkg/stew/arrayops

import ../../logutils
import ../../market
import ../../blocktype as bt
import ../../merkletree
import ../../manifest
import ../../stores

import ../builder

import ./utils

logScope:
  topics = "codex datasampler"

type
  Cell* = seq[byte]

  Sample* = object
    data*: Cell
    slotProof*: Poseidon2Proof
    cellProof*: Poseidon2Proof
    slotBlockIdx*: Natural
    blockCellIdx*: Natural

  ProofInput* = object
    entropy*: Poseidon2Hash
    verifyRoot*: Poseidon2Hash
    verifyProof*: Poseidon2Proof
    numSlots*: Natural
    numCells*: Natural
    slotIndex*: Natural
    samples*: seq[Sample]

  DataSampler* = ref object of RootObj
    index: Natural
    blockStore: BlockStore
    # The following data is invariant over time for a given slot:
    builder: SlotsBuilder

proc new*(
    T: type DataSampler,
    index: Natural,
    blockStore: BlockStore,
    builder: SlotsBuilder): ?!DataSampler =

  if index > builder.slotRoots.high:
    error "Slot index is out of range"
    return failure("Slot index is out of range")

  success DataSampler(
    index: index,
    blockStore: blockStore,
    builder: builder)

proc getCell*(self: DataSampler, blkBytes: seq[byte], blkCellIdx: Natural): Cell =
  let
    cellSize = self.builder.cellSize.uint64
    dataStart = cellSize * blkCellIdx.uint64
    dataEnd = dataStart + cellSize
  return blkBytes[dataStart ..< dataEnd]

proc createProofSample(self: DataSampler, slotTreeCid: Cid, cellIdx: Natural): Future[?!Sample] {.async.} =
  let
    cellsPerBlock = self.builder.numBlockCells
    blkCellIdx = cellIdx.toBlockCellIdx(cellsPerBlock)
    slotBlkIdx = cellIdx.toBlockIdx(cellsPerBlock)

  logScope:
    cellIdx = cellIdx
    slotBlkIdx = slotBlkIdx
    blkCellIdx = blkCellIdx

  without (cid, proof) =? await self.blockStore.getCidAndProof(
    slotTreeCid,
    slotBlkIdx.Natural), err:
    error "Failed to get block from block store", err = err.msg
    return failure(err)

  without slotProof =? proof.toVerifiableProof(), err:
    error "Unable to convert slot proof to poseidon proof", error = err.msg
    return failure(err)

  # If the cell index is greater than or equal to the UNPADDED number of slot cells,
  # Then we're sampling inside a padded block.
  # In this case, we use the pre-generated zero-data and pre-generated padding-proof for this cell index.
  if cellIdx >= self.builder.numSlotCells:
    trace "Sampling a padded block"

    without blockProof =? self.builder.emptyDigestTree.getProof(blkCellIdx), err:
      error "Failed to get proof from empty block tree", err = err.msg
      return failure(err)

    success(Sample(
      data: newSeq[byte](self.builder.cellSize.int),
      slotProof: slotProof,
      cellProof: blockProof,
      slotBlockIdx: slotBlkIdx.Natural,
      blockCellIdx: blkCellIdx.Natural))

  else:
    trace "Sampling a dataset block"
    # This converts our slotBlockIndex to a datasetBlockIndex using the
    # indexing-strategy used by the builder.
    # We need this to fetch the block data. We can't do it by slotTree + slotBlkIdx.
    let datasetBlockIndex = self.builder.slotIndicies(self.index)[slotBlkIdx]

    without (bytes, blkTree) =? await self.builder.buildBlockTree(datasetBlockIndex), err:
      error "Failed to build block tree", err = err.msg
      return failure(err)

    without blockProof =? blkTree.getProof(blkCellIdx), err:
      error "Failed to get proof from block tree", err = err.msg
      return failure(err)

    success(Sample(
      data: self.getCell(bytes, blkCellIdx),
      slotProof: slotProof,
      cellProof: blockProof,
      slotBlockIdx: slotBlkIdx.Natural,
      blockCellIdx: blkCellIdx.Natural))

proc getProofInput*(
  self: DataSampler,
  entropy: ProofChallenge,
  nSamples: Natural): Future[?!ProofInput] {.async.} =
  ## Generate proofs as input to the proving circuit.
  ##

  let
    entropy = Poseidon2Hash.fromBytes(
      array[31, byte].initCopyFrom(entropy[0..30])) # truncate to 31 bytes, otherwise it _might_ be greater than mod

  without verifyTree =? self.builder.verifyTree and
    verifyProof =? verifyTree.getProof(self.index) and
    verifyRoot =? verifyTree.root(), err:
    error "Failed to get slot proof from verify tree", err = err.msg
    return failure(err)

  let slotTreeCid = self.builder.manifest.slotRoots[self.index]

  logScope:
    index = self.index
    samples = nSamples
    slotTreeCid = slotTreeCid

  trace "Collecting input for proof"

  let cellIdxs = entropy.cellIndices(
      self.builder.slotRoots[self.index],
      self.builder.numSlotCellsPadded,
      nSamples)

  trace "Found cell indices", cellIdxs
  let samples = collect(newSeq):
    for cellIdx in cellIdxs:
      without sample =? (await self.createProofSample(slotTreeCid, cellIdx)), err:
        error "Failed to create proof sample", error = err.msg
        return failure(err)
      sample

  success ProofInput(
    entropy: entropy,
    verifyRoot: verifyRoot,
    verifyProof: verifyProof,
    numSlots: self.builder.numSlots,
    numCells: self.builder.numSlotCells,
    slotIndex: self.index,
    samples: samples)
Wire sampler (#676) * Setting up testfixture for proof datasampler * Sets up calculating number of cells in a slot * Sets up tests for bitwise modulo * Implements cell index collection * setting up slot blocks module * Implements getting treeCID from slot * implements getting slot blocks by index * Implements out-of-range check for slot index * cleanup * Sets up getting sample from block * Implements selecting a cell sample from a block * Implements building a minitree for block cells * Adds method to get dataset block index from slot block index * It's running * splits up indexing * almost there * Fixes test. Implementation is now functional * Refactoring to object-oriented * Cleanup * Lining up output type with updated reference code. * setting up * Updates expected samples * Updates proof checking test to match new format * move builder to own dir * move sampler to own dir * fix paths * various changes to add support for the sampler * wip sampler implementation * don't use upraises * wip sampler integration * misc * move tests around * Various fixes to select correct slot and block index * removing old tests * cleanup * misc fix tests that work with correct cell indices * remove unused file * fixup logging * add logscope * truncate entropy to 31 bytes, otherwise it might be > than mod * forwar getCidAndProof to local store * misc * Adds missing test for initial-proving state * reverting back to correct slot/block indexing * fix tests for revert * misc * misc --------- Co-authored-by: benbierens <thatbenbierens@gmail.com> 2024-01-17 19:24:34 +00:00			`## Nim-Codex`
			`## Copyright (c) 2023 Status Research & Development GmbH`
			`## Licensed under either of`
			`## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))`
			`## * MIT license ([LICENSE-MIT](LICENSE-MIT))`
			`## at your option.`
			`## This file may not be copied, modified, or distributed except according to`
			`## those terms.`

			`import std/sugar`
			`import std/sequtils`

			`import pkg/chronos`
			`import pkg/questionable`
			`import pkg/questionable/results`
			`import pkg/constantine/math/arithmetic`
			`import pkg/poseidon2`
			`import pkg/poseidon2/types`
			`import pkg/poseidon2/io`
			`import pkg/stew/arrayops`

feat: create logging proxy (#663) * implement a logging proxy The logging proxy: - prevents the need to import chronicles (as well as export except toJson), - prevents the need to override `writeValue` or use or import nim-json-seralization elsewhere in the codebase, allowing for sole use of utils/json for de/serialization, - and handles json formatting correctly in chronicles json sinks * Rename logging -> logutils to avoid ambiguity with common names * clean up * add setProperty for JsonRecord, remove nim-json-serialization conflict * Allow specifying textlines and json format separately Not specifying a LogFormat will apply the formatting to both textlines and json sinks. Specifying a LogFormat will apply the formatting to only that sink. * remove unneeded usages of std/json We only need to import utils/json instead of std/json * move serialization from rest/json to utils/json so it can be shared * fix NoColors ambiguity Was causing unit tests to fail on Windows. * Remove nre usage to fix Windows error Windows was erroring with `could not load: pcre64.dll`. Instead of fixing that error, remove the pcre usage :) * Add logutils module doc * Shorten logutils.formatIt for `NBytes` Both json and textlines formatIt were not needed, and could be combined into one formatIt * remove debug integration test config debug output and logformat of json for integration test logs * Use ## module doc to support docgen * bump nim-poseidon2 to export fromBytes Before the changes in this branch, fromBytes was likely being resolved by nim-stew, or other dependency. With the changes in this branch, that dependency was removed and fromBytes could no longer be resolved. By exporting fromBytes from nim-poseidon, the correct resolution is now happening. * fixes to get compiling after rebasing master * Add support for Result types being logged using formatIt 2024-01-23 07:35:03 +00:00			`import ../../logutils`
Wire sampler (#676) * Setting up testfixture for proof datasampler * Sets up calculating number of cells in a slot * Sets up tests for bitwise modulo * Implements cell index collection * setting up slot blocks module * Implements getting treeCID from slot * implements getting slot blocks by index * Implements out-of-range check for slot index * cleanup * Sets up getting sample from block * Implements selecting a cell sample from a block * Implements building a minitree for block cells * Adds method to get dataset block index from slot block index * It's running * splits up indexing * almost there * Fixes test. Implementation is now functional * Refactoring to object-oriented * Cleanup * Lining up output type with updated reference code. * setting up * Updates expected samples * Updates proof checking test to match new format * move builder to own dir * move sampler to own dir * fix paths * various changes to add support for the sampler * wip sampler implementation * don't use upraises * wip sampler integration * misc * move tests around * Various fixes to select correct slot and block index * removing old tests * cleanup * misc fix tests that work with correct cell indices * remove unused file * fixup logging * add logscope * truncate entropy to 31 bytes, otherwise it might be > than mod * forwar getCidAndProof to local store * misc * Adds missing test for initial-proving state * reverting back to correct slot/block indexing * fix tests for revert * misc * misc --------- Co-authored-by: benbierens <thatbenbierens@gmail.com> 2024-01-17 19:24:34 +00:00			`import ../../market`
			`import ../../blocktype as bt`
			`import ../../merkletree`
			`import ../../manifest`
			`import ../../stores`

			`import ../builder`

			`import ./utils`

			`logScope:`
			`topics = "codex datasampler"`

			`type`
			`Cell* = seq[byte]`

			`Sample* = object`
			`data*: Cell`
			`slotProof*: Poseidon2Proof`
			`cellProof*: Poseidon2Proof`
			`slotBlockIdx*: Natural`
			`blockCellIdx*: Natural`

			`ProofInput* = object`
			`entropy*: Poseidon2Hash`
			`verifyRoot*: Poseidon2Hash`
			`verifyProof*: Poseidon2Proof`
			`numSlots*: Natural`
			`numCells*: Natural`
			`slotIndex*: Natural`
			`samples*: seq[Sample]`

			`DataSampler* = ref object of RootObj`
			`index: Natural`
			`blockStore: BlockStore`
			`# The following data is invariant over time for a given slot:`
			`builder: SlotsBuilder`

			`proc new*(`
			`T: type DataSampler,`
			`index: Natural,`
			`blockStore: BlockStore,`
			`builder: SlotsBuilder): ?!DataSampler =`

			`if index > builder.slotRoots.high:`
			`error "Slot index is out of range"`
			`return failure("Slot index is out of range")`

			`success DataSampler(`
			`index: index,`
			`blockStore: blockStore,`
			`builder: builder)`

			`proc getCell*(self: DataSampler, blkBytes: seq[byte], blkCellIdx: Natural): Cell =`
			`let`
			`cellSize = self.builder.cellSize.uint64`
			`dataStart = cellSize * blkCellIdx.uint64`
			`dataEnd = dataStart + cellSize`
			`return blkBytes[dataStart ..< dataEnd]`

Debug/sampling (#681) * Extra logging in sampler * wip: Fixing sampling issue in padded slot cells * Cleanup * Restores tests 2024-01-25 23:34:38 +00:00			`proc createProofSample(self: DataSampler, slotTreeCid: Cid, cellIdx: Natural): Future[?!Sample] {.async.} =`
			`let`
			`cellsPerBlock = self.builder.numBlockCells`
			`blkCellIdx = cellIdx.toBlockCellIdx(cellsPerBlock)`
			`slotBlkIdx = cellIdx.toBlockIdx(cellsPerBlock)`

			`logScope:`
			`cellIdx = cellIdx`
			`slotBlkIdx = slotBlkIdx`
			`blkCellIdx = blkCellIdx`

			`without (cid, proof) =? await self.blockStore.getCidAndProof(`
			`slotTreeCid,`
			`slotBlkIdx.Natural), err:`
			`error "Failed to get block from block store", err = err.msg`
			`return failure(err)`

			`without slotProof =? proof.toVerifiableProof(), err:`
			`error "Unable to convert slot proof to poseidon proof", error = err.msg`
			`return failure(err)`

			`# If the cell index is greater than or equal to the UNPADDED number of slot cells,`
			`# Then we're sampling inside a padded block.`
			`# In this case, we use the pre-generated zero-data and pre-generated padding-proof for this cell index.`
			`if cellIdx >= self.builder.numSlotCells:`
			`trace "Sampling a padded block"`

			`without blockProof =? self.builder.emptyDigestTree.getProof(blkCellIdx), err:`
			`error "Failed to get proof from empty block tree", err = err.msg`
			`return failure(err)`

			`success(Sample(`
			`data: newSeq[byte](self.builder.cellSize.int),`
			`slotProof: slotProof,`
			`cellProof: blockProof,`
			`slotBlockIdx: slotBlkIdx.Natural,`
			`blockCellIdx: blkCellIdx.Natural))`

			`else:`
			`trace "Sampling a dataset block"`
			`# This converts our slotBlockIndex to a datasetBlockIndex using the`
			`# indexing-strategy used by the builder.`
			`# We need this to fetch the block data. We can't do it by slotTree + slotBlkIdx.`
			`let datasetBlockIndex = self.builder.slotIndicies(self.index)[slotBlkIdx]`

			`without (bytes, blkTree) =? await self.builder.buildBlockTree(datasetBlockIndex), err:`
			`error "Failed to build block tree", err = err.msg`
			`return failure(err)`

			`without blockProof =? blkTree.getProof(blkCellIdx), err:`
			`error "Failed to get proof from block tree", err = err.msg`
			`return failure(err)`

			`success(Sample(`
			`data: self.getCell(bytes, blkCellIdx),`
			`slotProof: slotProof,`
			`cellProof: blockProof,`
			`slotBlockIdx: slotBlkIdx.Natural,`
			`blockCellIdx: blkCellIdx.Natural))`

Wire sampler (#676) * Setting up testfixture for proof datasampler * Sets up calculating number of cells in a slot * Sets up tests for bitwise modulo * Implements cell index collection * setting up slot blocks module * Implements getting treeCID from slot * implements getting slot blocks by index * Implements out-of-range check for slot index * cleanup * Sets up getting sample from block * Implements selecting a cell sample from a block * Implements building a minitree for block cells * Adds method to get dataset block index from slot block index * It's running * splits up indexing * almost there * Fixes test. Implementation is now functional * Refactoring to object-oriented * Cleanup * Lining up output type with updated reference code. * setting up * Updates expected samples * Updates proof checking test to match new format * move builder to own dir * move sampler to own dir * fix paths * various changes to add support for the sampler * wip sampler implementation * don't use upraises * wip sampler integration * misc * move tests around * Various fixes to select correct slot and block index * removing old tests * cleanup * misc fix tests that work with correct cell indices * remove unused file * fixup logging * add logscope * truncate entropy to 31 bytes, otherwise it might be > than mod * forwar getCidAndProof to local store * misc * Adds missing test for initial-proving state * reverting back to correct slot/block indexing * fix tests for revert * misc * misc --------- Co-authored-by: benbierens <thatbenbierens@gmail.com> 2024-01-17 19:24:34 +00:00			`proc getProofInput*(`
			`self: DataSampler,`
			`entropy: ProofChallenge,`
			`nSamples: Natural): Future[?!ProofInput] {.async.} =`
			`## Generate proofs as input to the proving circuit.`
			`##`

			`let`
			`entropy = Poseidon2Hash.fromBytes(`
			`array[31, byte].initCopyFrom(entropy[0..30])) # truncate to 31 bytes, otherwise it _might_ be greater than mod`

			`without verifyTree =? self.builder.verifyTree and`
			`verifyProof =? verifyTree.getProof(self.index) and`
			`verifyRoot =? verifyTree.root(), err:`
			`error "Failed to get slot proof from verify tree", err = err.msg`
			`return failure(err)`

Debug/sampling (#681) * Extra logging in sampler * wip: Fixing sampling issue in padded slot cells * Cleanup * Restores tests 2024-01-25 23:34:38 +00:00			`let slotTreeCid = self.builder.manifest.slotRoots[self.index]`
Wire sampler (#676) * Setting up testfixture for proof datasampler * Sets up calculating number of cells in a slot * Sets up tests for bitwise modulo * Implements cell index collection * setting up slot blocks module * Implements getting treeCID from slot * implements getting slot blocks by index * Implements out-of-range check for slot index * cleanup * Sets up getting sample from block * Implements selecting a cell sample from a block * Implements building a minitree for block cells * Adds method to get dataset block index from slot block index * It's running * splits up indexing * almost there * Fixes test. Implementation is now functional * Refactoring to object-oriented * Cleanup * Lining up output type with updated reference code. * setting up * Updates expected samples * Updates proof checking test to match new format * move builder to own dir * move sampler to own dir * fix paths * various changes to add support for the sampler * wip sampler implementation * don't use upraises * wip sampler integration * misc * move tests around * Various fixes to select correct slot and block index * removing old tests * cleanup * misc fix tests that work with correct cell indices * remove unused file * fixup logging * add logscope * truncate entropy to 31 bytes, otherwise it might be > than mod * forwar getCidAndProof to local store * misc * Adds missing test for initial-proving state * reverting back to correct slot/block indexing * fix tests for revert * misc * misc --------- Co-authored-by: benbierens <thatbenbierens@gmail.com> 2024-01-17 19:24:34 +00:00
			`logScope:`
			`index = self.index`
			`samples = nSamples`
			`slotTreeCid = slotTreeCid`

			`trace "Collecting input for proof"`

Debug/sampling (#681) * Extra logging in sampler * wip: Fixing sampling issue in padded slot cells * Cleanup * Restores tests 2024-01-25 23:34:38 +00:00			`let cellIdxs = entropy.cellIndices(`
			`self.builder.slotRoots[self.index],`
			`self.builder.numSlotCellsPadded,`
			`nSamples)`
Wire sampler (#676) * Setting up testfixture for proof datasampler * Sets up calculating number of cells in a slot * Sets up tests for bitwise modulo * Implements cell index collection * setting up slot blocks module * Implements getting treeCID from slot * implements getting slot blocks by index * Implements out-of-range check for slot index * cleanup * Sets up getting sample from block * Implements selecting a cell sample from a block * Implements building a minitree for block cells * Adds method to get dataset block index from slot block index * It's running * splits up indexing * almost there * Fixes test. Implementation is now functional * Refactoring to object-oriented * Cleanup * Lining up output type with updated reference code. * setting up * Updates expected samples * Updates proof checking test to match new format * move builder to own dir * move sampler to own dir * fix paths * various changes to add support for the sampler * wip sampler implementation * don't use upraises * wip sampler integration * misc * move tests around * Various fixes to select correct slot and block index * removing old tests * cleanup * misc fix tests that work with correct cell indices * remove unused file * fixup logging * add logscope * truncate entropy to 31 bytes, otherwise it might be > than mod * forwar getCidAndProof to local store * misc * Adds missing test for initial-proving state * reverting back to correct slot/block indexing * fix tests for revert * misc * misc --------- Co-authored-by: benbierens <thatbenbierens@gmail.com> 2024-01-17 19:24:34 +00:00
Debug/sampling (#681) * Extra logging in sampler * wip: Fixing sampling issue in padded slot cells * Cleanup * Restores tests 2024-01-25 23:34:38 +00:00			`trace "Found cell indices", cellIdxs`
			`let samples = collect(newSeq):`
			`for cellIdx in cellIdxs:`
			`without sample =? (await self.createProofSample(slotTreeCid, cellIdx)), err:`
			`error "Failed to create proof sample", error = err.msg`
Wire sampler (#676) * Setting up testfixture for proof datasampler * Sets up calculating number of cells in a slot * Sets up tests for bitwise modulo * Implements cell index collection * setting up slot blocks module * Implements getting treeCID from slot * implements getting slot blocks by index * Implements out-of-range check for slot index * cleanup * Sets up getting sample from block * Implements selecting a cell sample from a block * Implements building a minitree for block cells * Adds method to get dataset block index from slot block index * It's running * splits up indexing * almost there * Fixes test. Implementation is now functional * Refactoring to object-oriented * Cleanup * Lining up output type with updated reference code. * setting up * Updates expected samples * Updates proof checking test to match new format * move builder to own dir * move sampler to own dir * fix paths * various changes to add support for the sampler * wip sampler implementation * don't use upraises * wip sampler integration * misc * move tests around * Various fixes to select correct slot and block index * removing old tests * cleanup * misc fix tests that work with correct cell indices * remove unused file * fixup logging * add logscope * truncate entropy to 31 bytes, otherwise it might be > than mod * forwar getCidAndProof to local store * misc * Adds missing test for initial-proving state * reverting back to correct slot/block indexing * fix tests for revert * misc * misc --------- Co-authored-by: benbierens <thatbenbierens@gmail.com> 2024-01-17 19:24:34 +00:00			`return failure(err)`
Debug/sampling (#681) * Extra logging in sampler * wip: Fixing sampling issue in padded slot cells * Cleanup * Restores tests 2024-01-25 23:34:38 +00:00			`sample`
Wire sampler (#676) * Setting up testfixture for proof datasampler * Sets up calculating number of cells in a slot * Sets up tests for bitwise modulo * Implements cell index collection * setting up slot blocks module * Implements getting treeCID from slot * implements getting slot blocks by index * Implements out-of-range check for slot index * cleanup * Sets up getting sample from block * Implements selecting a cell sample from a block * Implements building a minitree for block cells * Adds method to get dataset block index from slot block index * It's running * splits up indexing * almost there * Fixes test. Implementation is now functional * Refactoring to object-oriented * Cleanup * Lining up output type with updated reference code. * setting up * Updates expected samples * Updates proof checking test to match new format * move builder to own dir * move sampler to own dir * fix paths * various changes to add support for the sampler * wip sampler implementation * don't use upraises * wip sampler integration * misc * move tests around * Various fixes to select correct slot and block index * removing old tests * cleanup * misc fix tests that work with correct cell indices * remove unused file * fixup logging * add logscope * truncate entropy to 31 bytes, otherwise it might be > than mod * forwar getCidAndProof to local store * misc * Adds missing test for initial-proving state * reverting back to correct slot/block indexing * fix tests for revert * misc * misc --------- Co-authored-by: benbierens <thatbenbierens@gmail.com> 2024-01-17 19:24:34 +00:00
			`success ProofInput(`
			`entropy: entropy,`
			`verifyRoot: verifyRoot,`
			`verifyProof: verifyProof,`
			`numSlots: self.builder.numSlots,`
			`numCells: self.builder.numSlotCells,`
			`slotIndex: self.index,`
			`samples: samples)`