implemented backend; correct start of monitoring thread needs to be finished

2024-08-28 16:37:52 -05:00 · 2024-08-28 16:37:52 -05:00 · 9b13facb7a
parent 0cb039d806
commit 9b13facb7a
4 changed files with 138 additions and 1 deletions
--- a/nomos-services/data-availability/sampling/Cargo.toml
+++ b/nomos-services/data-availability/sampling/Cargo.toml
@ -23,6 +23,7 @@ tracing = "0.1"
 thiserror = "1.0.63"
 rand = "0.8.5"
 rand_chacha = "0.3.1"
 chrono = "0.4.38"
 [features]
 default = ["libp2p"]
--- a/nomos-services/data-availability/sampling/src/backend/kzgrs.rs
+++ b/nomos-services/data-availability/sampling/src/backend/kzgrs.rs
@ -1,2 +1,131 @@
 use std::borrow::BorrowMut;
 // std
 use std::collections::{BTreeSet, HashMap};
 use std::fmt::Debug;
 use std::thread;
 use std::time::Duration;
 // crates
 use chrono::{naive::NaiveDateTime, Utc};
 use rand::distributions::Standard;
 use rand::prelude::*;
 use rand_chacha::ChaCha20Rng;
 use kzgrs_backend::common::blob::DaBlob;
 //
 // internal
 use super::DaSamplingServiceBackend;
 use nomos_core::da::BlobId;
 use nomos_da_network_core::SubnetworkId;
 pub struct SamplingContext {
    blob_id: BlobId,
    subnets: Vec<SubnetworkId>,
    started: NaiveDateTime,
 }
 #[derive(Debug, Clone)]
 pub struct KzgrsDaSamplerSettings {
    pub num_samples: u16,
    pub old_blobs_check_duration: Duration,
    pub blobs_validity_duration: Duration,
 }
 pub struct KzgrsDaSampler {
    settings: KzgrsDaSamplerSettings,
    validated_blobs: BTreeSet<BlobId>,
    // TODO: This needs to be properly synchronized, if this is going to be accessed
    // by independent threads (monitoring thread)
    pending_sampling_blobs: HashMap<BlobId, SamplingContext>,
    // TODO: is there a better place for this? Do we need to have this even globally?
    // Do we already have some source of randomness already?
    rng: ChaCha20Rng,
 }
 impl KzgrsDaSampler {
    // TODO: this might not be the right signature, as the lifetime of self needs to be evaluated
    async fn start_pending_blob_monitor(&'static mut self) {
        //let mut sself = self;
        let monitor = thread::spawn(move || {
            loop {
                thread::sleep(self.settings.old_blobs_check_duration);
                // everything older than cut_timestamp should be removed;
                let cut_timestamp = Utc::now().naive_utc() - self.settings.blobs_validity_duration;
                // retain all elements which come after the cut_timestamp
                self.pending_sampling_blobs
                    .retain(|_, ctx| ctx.started.gt(&cut_timestamp));
            }
        });
        monitor.join().unwrap();
    }
 }
 #[async_trait::async_trait]
 impl<'a> DaSamplingServiceBackend for KzgrsDaSampler {
    type Settings = KzgrsDaSamplerSettings;
    type BlobId = BlobId;
    type Blob = DaBlob;
    fn new(settings: Self::Settings) -> Self {
        let bt: BTreeSet<BlobId> = BTreeSet::new();
        Self {
            settings: settings,
            validated_blobs: bt,
            pending_sampling_blobs: HashMap::new(),
            rng: ChaCha20Rng::from_entropy(),
        }
        // TODO: how to start the actual monitoring thread with the correct ownership/lifetime?
    }
    async fn get_validated_blobs(&self) -> BTreeSet<Self::BlobId> {
        self.validated_blobs.clone()
    }
    async fn mark_in_block(&mut self, blobs_ids: &[Self::BlobId]) {
        for id in blobs_ids {
            if self.pending_sampling_blobs.contains_key(id) {
                self.pending_sampling_blobs.remove(id);
            }
            if self.validated_blobs.contains(id) {
                self.validated_blobs.remove(id);
            }
        }
    }
    async fn handle_sampling_success(&mut self, blob_id: Self::BlobId, blob: Self::Blob) {
        // this should not even happen
        if !self.pending_sampling_blobs.contains_key(&blob_id) {}
        let ctx = self.pending_sampling_blobs.get_mut(&blob_id).unwrap();
        ctx.subnets.push(blob.column_idx as SubnetworkId);
        // sampling of this blob_id terminated successfully
        if ctx.subnets.len() == self.settings.num_samples as usize {
            self.validated_blobs.insert(blob_id);
        }
    }
    async fn handle_sampling_error(&mut self, _blob_id: Self::BlobId) {
        // TODO: Unimplmented yet because the error handling in the service
        // does not yet receive a blob_id
        unimplemented!("no use case yet")
    }
    async fn init_sampling(&mut self, blob_id: Self::BlobId) -> Vec<SubnetworkId> {
        let mut ctx: SamplingContext = SamplingContext {
            blob_id: (blob_id),
            subnets: vec![],
            started: Utc::now().naive_utc(),
        };
        let subnets: Vec<SubnetworkId> = Standard
            .sample_iter(&mut self.rng)
            .take(self.settings.num_samples as usize)
            .collect();
        ctx.subnets = subnets.clone();
        subnets
    }
 }
--- a/nomos-services/data-availability/sampling/src/backend/mod.rs
+++ b/nomos-services/data-availability/sampling/src/backend/mod.rs
@ -1,3 +1,5 @@
 pub mod kzgrs;
 // std
 use std::collections::BTreeSet;
@ -14,7 +16,7 @@ pub trait DaSamplingServiceBackend {
    fn new(settings: Self::Settings) -> Self;
    async fn get_validated_blobs(&self) -> BTreeSet<Self::BlobId>;
-    async fn mark_in_block(&mut self, blobs_id: &[Self::BlobId]);
+    async fn mark_in_block(&mut self, blobs_ids: &[Self::BlobId]);
    async fn handle_sampling_success(&mut self, blob_id: Self::BlobId, blob: Self::Blob);
    async fn handle_sampling_error(&mut self, blob_id: Self::BlobId);
    async fn init_sampling(&mut self, blob_id: Self::BlobId) -> Vec<SubnetworkId>;
--- a/nomos-services/data-availability/sampling/src/lib.rs
+++ b/nomos-services/data-availability/sampling/src/lib.rs
@ -115,6 +115,11 @@ where
                sampler.handle_sampling_success(blob_id, *blob).await;
            }
            SamplingEvent::SamplingError { error } => {
                // TODO: in most of these error cases we can't get the blob_id from the error
                // Shouldn't the error contain that?
                // We can of course stop tracking that blob_id in the backend via timeout,
                // which we want to have anyways, but could it be nicer to remove it here too,
                // by calling the handler_sampling_error method?
                error!("Error while sampling: {error}");
            }
        }