mirror of
https://github.com/logos-storage/das-research.git
synced 2026-01-14 19:13:08 +00:00
271 lines
9.7 KiB
Python
271 lines
9.7 KiB
Python
import time
|
|
import progressbar
|
|
import random
|
|
import numpy as np
|
|
import pandas as pd
|
|
import dht
|
|
from utils import getStrFromDelayRange
|
|
|
|
TOTAL_PERCENTAGE = 100
|
|
PERCENTAGE_INTERVALS = 1
|
|
|
|
|
|
class SingleDHTretrievalStudy:
|
|
|
|
def __init__(self, csvFolder, imgFolder, jobs, nn, rn, samples,
|
|
fErrR, sErrR, cDelR, fDelR, sDelR, k, a, b, y, stepsToStop):
|
|
self.csvFolder = csvFolder
|
|
self.imgFolder = imgFolder
|
|
self.jobs = jobs
|
|
self.nn = nn
|
|
self.rn = rn
|
|
self.samples = samples
|
|
self.fastErrorRate = fErrR
|
|
self.slowErrorRate = sErrR
|
|
self.connDelayRange = cDelR
|
|
self.fastDelayRange = fDelR
|
|
self.slowDelayRange = sDelR # timeouts
|
|
self.k = k
|
|
self.alpha = a
|
|
self.beta = b
|
|
self.gamma = y
|
|
self.stepsToStop = stepsToStop
|
|
# namings
|
|
s = ""
|
|
s += f"_nn{nn}"
|
|
s += f"_rn{rn}"
|
|
s += f"_sampl{samples}"
|
|
s += f"_fer{fErrR}"
|
|
s += f"_ser{sErrR}"
|
|
s += f"_cdr{getStrFromDelayRange(cDelR)}"
|
|
s += f"_fdr{getStrFromDelayRange(fDelR)}"
|
|
s += f"_sdr{getStrFromDelayRange(sDelR)}"
|
|
s += f"_k{k}"
|
|
s += f"_a{a}"
|
|
s += f"_b{b}"
|
|
s += f"_y{y}"
|
|
s += f"_steps{stepsToStop}"
|
|
self.studyName = s
|
|
print(f"Retrieval Study => {s}")
|
|
|
|
def run(self):
|
|
# Init the DHT Network
|
|
testInitTime = time.time()
|
|
network = dht.DHTNetwork(
|
|
0,
|
|
self.fastErrorRate,
|
|
self.slowErrorRate,
|
|
self.connDelayRange,
|
|
self.fastDelayRange,
|
|
self.slowDelayRange,
|
|
self.gamma)
|
|
initStartTime = time.time()
|
|
network.init_with_random_peers(
|
|
self.jobs,
|
|
self.nn,
|
|
self.k,
|
|
self.alpha,
|
|
self.beta,
|
|
self.stepsToStop)
|
|
self.networkInitTime = time.time() - initStartTime
|
|
print(f"network init done in {self.networkInitTime} secs")
|
|
|
|
# get random node to propose publish the
|
|
builderNode = network.nodestore.get_node(random.randint(0, self.nn))
|
|
|
|
# create and publish @@@ number of samples to the network
|
|
# lookups metrics
|
|
ks = []
|
|
nns = []
|
|
stepstostops = []
|
|
fastErrorRate = []
|
|
slowErrorRate = []
|
|
connDelayRange = []
|
|
fastDelayRange = []
|
|
slowDelayRange = []
|
|
alphas = []
|
|
betas = []
|
|
gammas = []
|
|
providers = []
|
|
sampleNames = []
|
|
provideLookupAggrTime = []
|
|
provideAggrTime = []
|
|
provideOperationAggrTime = []
|
|
provideSuccNodes = []
|
|
provideFailedNodes = []
|
|
samples = []
|
|
|
|
for i in range(self.samples):
|
|
sampleContent = f"sample {i}"
|
|
summary, _ = builderNode.provide_block_segment(sampleContent)
|
|
samples.append((sampleContent, sampleContent, summary))
|
|
# add metrics for the csv
|
|
ks.append(self.k)
|
|
alphas.append(self.alpha)
|
|
betas.append(self.beta)
|
|
gammas.append(self.gamma)
|
|
nns.append(self.nn)
|
|
stepstostops.append(self.stepsToStop)
|
|
fastErrorRate.append(f"{self.fastErrorRate}")
|
|
slowErrorRate.append(f"{self.slowErrorRate}")
|
|
connDelayRange.append(f"{getStrFromDelayRange(self.connDelayRange)}")
|
|
fastDelayRange.append(f"{getStrFromDelayRange(self.fastDelayRange)}")
|
|
slowDelayRange.append(f"{getStrFromDelayRange(self.slowDelayRange)}")
|
|
providers.append(builderNode.ID)
|
|
sampleNames.append(sampleContent)
|
|
provideLookupAggrTime.append(summary['lookupDelay'])
|
|
provideAggrTime.append(summary['provideDelay'])
|
|
provideOperationAggrTime.append(summary['operationDelay'])
|
|
provideSuccNodes.append(len(summary['succesNodeIDs']))
|
|
provideFailedNodes.append(len(summary['failedNodeIDs']))
|
|
|
|
# save the provide data
|
|
df = pd.DataFrame({
|
|
"number_nodes": nns,
|
|
"k": ks,
|
|
"alpha": alphas,
|
|
"beta": betas,
|
|
"gamma": gammas,
|
|
"stop_steps": stepstostops,
|
|
"fast_error_rate": fastErrorRate,
|
|
"slow_error_rate": slowErrorRate,
|
|
"connection_delay_range": connDelayRange,
|
|
"fast_delay_range": fastDelayRange,
|
|
"slow_delay": slowDelayRange,
|
|
"provider": providers,
|
|
"sample": sampleNames,
|
|
"provide_lookup_aggr_time": provideLookupAggrTime,
|
|
"provide_aggr_time": provideAggrTime,
|
|
"provide_operation_aggr_time": provideOperationAggrTime,
|
|
"provide_succ_nodes": provideSuccNodes,
|
|
"provide_fail_nodes": provideFailedNodes,
|
|
})
|
|
|
|
df.to_csv(self.csvFolder + f"/retrieval_provide{self.studyName}.csv")
|
|
network.reset_network_metrics()
|
|
del df
|
|
|
|
nns = []
|
|
ks = []
|
|
alphas = []
|
|
betas = []
|
|
gammas = []
|
|
stepstostops = []
|
|
fastErrorRate = []
|
|
slowErrorRate = []
|
|
connDelayRange = []
|
|
fastDelayRange = []
|
|
slowDelayRange = []
|
|
retrievers = []
|
|
sampleNames = []
|
|
lookupTimes = []
|
|
lookupAggrDelays = []
|
|
attemptedNodes = []
|
|
finishedConnAttempts = []
|
|
successfullCons = []
|
|
failedCons = []
|
|
valRetrievable = []
|
|
totalDiscNodes = []
|
|
accuracies = []
|
|
|
|
bar = progressbar.ProgressBar(
|
|
maxval=self.rn,
|
|
widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])
|
|
bar.start()
|
|
|
|
for i in range(self.rn):
|
|
retrieverNode = network.nodestore.get_node(random.randint(0, self.nn))
|
|
while retrieverNode.ID == builderNode.ID:
|
|
retrieverNode = network.nodestore.get_node(random.randint(0, self.nn))
|
|
|
|
for l in range(self.samples):
|
|
sampleContent = f"sample {l}"
|
|
sh = dht.Hash(sampleContent)
|
|
lstime = time.time()
|
|
closest, val, summary, aggrDelay = retrieverNode.lookup_for_hash(
|
|
key=sh, trackaccuracy=True, finishwithfirstvalue=True)
|
|
lduration = time.time() - lstime
|
|
|
|
if val == sampleContent:
|
|
valRetrievable.append(1)
|
|
else:
|
|
valRetrievable.append(0)
|
|
|
|
nns.append(self.nn)
|
|
ks.append(self.k)
|
|
alphas.append(self.alpha)
|
|
betas.append(self.beta)
|
|
gammas.append(self.gamma)
|
|
stepstostops.append(self.stepsToStop)
|
|
fastErrorRate.append(f"{self.fastErrorRate}")
|
|
slowErrorRate.append(f"{self.slowErrorRate}")
|
|
connDelayRange.append(f"{getStrFromDelayRange(self.connDelayRange)}")
|
|
fastDelayRange.append(f"{getStrFromDelayRange(self.fastDelayRange)}")
|
|
slowDelayRange.append(f"{getStrFromDelayRange(self.slowDelayRange)}")
|
|
retrievers.append(retrieverNode.ID)
|
|
sampleNames.append(sampleContent)
|
|
lookupTimes.append(lduration)
|
|
lookupAggrDelays.append(aggrDelay)
|
|
finishedConnAttempts.append(summary['connectionFinished'])
|
|
attemptedNodes.append(summary['connectionAttempts'])
|
|
successfullCons.append(summary['successfulCons'])
|
|
failedCons.append(summary['failedCons'])
|
|
totalDiscNodes.append(summary['totalNodes'])
|
|
accuracies.append(summary['accuracy'])
|
|
|
|
# clean up the memory
|
|
del sh
|
|
del summary
|
|
del closest
|
|
|
|
# percentajes
|
|
bar.update(i + 1)
|
|
|
|
bar.finish()
|
|
|
|
testDuration = time.time() - testInitTime
|
|
print(f"test done in {testDuration} secs")
|
|
print(f"DHT fast-init jobs:{self.jobs} done in {self.networkInitTime} secs")
|
|
print(f"{self.nn} nodes, k={self.k}, alpha={self.alpha}, {len(lookupTimes)} lookups")
|
|
print(f"mean time per lookup : {np.mean(lookupTimes)}")
|
|
print(f"mean aggr delay (secs): {np.mean(lookupAggrDelays) / 1000}")
|
|
print(f"mean contacted nodes: {np.mean(attemptedNodes)}")
|
|
print(f"time to make {len(lookupTimes)} lookups: {np.sum(lookupTimes)} secs")
|
|
print()
|
|
|
|
# Create the panda objs and export the to csvs
|
|
df = pd.DataFrame({
|
|
"number_nodes": nns,
|
|
"k": ks,
|
|
"alpha": alphas,
|
|
"beta": betas,
|
|
"gamma": gammas,
|
|
"stop_steps": stepstostops,
|
|
"fast_error_rate": fastErrorRate,
|
|
"slow_error_rate": slowErrorRate,
|
|
"connection_delay_range": connDelayRange,
|
|
"fast_delay_range": fastDelayRange,
|
|
"slow_delay": slowDelayRange,
|
|
"retriever": retrievers,
|
|
"sample": sampleNames,
|
|
"lookup_wallclock_time": lookupTimes,
|
|
"lookup_aggregated_delay": lookupAggrDelays,
|
|
"attempted_nodes": attemptedNodes,
|
|
"finished_connection_attempts": finishedConnAttempts,
|
|
"successful_connections": successfullCons,
|
|
"failed_connections": failedCons,
|
|
"total_discovered_nodes": totalDiscNodes,
|
|
"retrievable": valRetrievable,
|
|
"accuracy": accuracies,
|
|
})
|
|
df.to_csv(self.csvFolder + f"/retrieval_lookup{self.studyName}.csv")
|
|
|
|
# save the network metrics
|
|
networkMetrics = network.connection_metrics()
|
|
network_df = pd.DataFrame(networkMetrics)
|
|
network_df.to_csv(self.csvFolder + f"/retrieval_lookup_network{self.studyName}.csv")
|
|
|
|
del network
|
|
del df
|
|
del network_df
|