mirror of
https://github.com/logos-storage/logos-storage-research.git
synced 2026-01-03 22:13:08 +00:00
370 lines
76 KiB
Plaintext
370 lines
76 KiB
Plaintext
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0,
|
|
"metadata": {
|
|
"colab": {
|
|
"name": "PoR Analysis",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
},
|
|
"language_info": {
|
|
"name": "python"
|
|
}
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"# Verification frequency calculation\n",
|
|
"\n",
|
|
"This code shows how we can compute the minimum horizontal and vertical verification frequency in order to achieve a certain level of guarantee computed in number of nines (e.g., 99.9999999%)\n"
|
|
],
|
|
"metadata": {
|
|
"id": "IAw0wWuN-1Yh"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import random as random\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"#@markdown Durability guarantee (three 9s (99.9%) to nine 9s (99.9999999%))\n",
|
|
"Durability = 9 #@param {type:\"slider\", min:3, max:9, step:3}\n",
|
|
"#@markdown Erasure Coding parameter K\n",
|
|
"ECK = 60 #@param {type:\"slider\", min:1, max:100, step:1}\n",
|
|
"#@markdown Percentage of data a malicious node discards or refuses/avoids to store\n",
|
|
"CheatRatio = 5 #@param {type:\"slider\", min:5, max:95, step:5}\n",
|
|
"#@markdown Number of PoR blocks inside an EC block (See model below)\n",
|
|
"NbPoRinEC = 500 #@param {type:\"slider\", min:100, max:10000, step:100}\n",
|
|
"\n",
|
|
"GuaranteeDurability = {3:99.90, 6: 99.99990, 9: 99.99999990}\n",
|
|
"ExpectedGuarantee = GuaranteeDurability[Durability]\n",
|
|
"\n",
|
|
"hx = []\n",
|
|
"HorizontalProb = []\n",
|
|
"for NbOfHVerif in range(ECK):\n",
|
|
" prob = (1-(((ECK-1)/100)**NbOfHVerif))*100\n",
|
|
" hx.append(NbOfHVerif)\n",
|
|
" HorizontalProb.append(prob)\n",
|
|
" if prob > ExpectedGuarantee:\n",
|
|
" print(\"Probability of detecting catastrophic failures with %d horizontal verifications is %12.12f%% (Over nine 9s)\" % (NbOfHVerif, prob))\n",
|
|
" break\n",
|
|
"\n",
|
|
"vx = [0]\n",
|
|
"VerticalProb = [0]\n",
|
|
"for NbOfVVerif in range(1, NbPoRinEC):\n",
|
|
" prob = (1-((1-(CheatRatio/100))**NbOfVVerif))*100\n",
|
|
" vx.append(NbOfVVerif)\n",
|
|
" VerticalProb.append(prob)\n",
|
|
" if prob > ExpectedGuarantee:\n",
|
|
" print(\"Probability of detecting catastrophic failures with %d vertical verifications %12.12f%% (Over nine 9s)\" % (NbOfVVerif, prob))\n",
|
|
" print(\"That is %2.1f%% of the PoR blocks need to be verified\" % (NbOfVVerif*100/NbPoRinEC))\n",
|
|
" break\n",
|
|
"\n",
|
|
"\n",
|
|
"fig, ax = plt.subplots()\n",
|
|
"plt.grid(axis='x', color='0.9')\n",
|
|
"plt.grid(axis='y', color='0.9')\n",
|
|
"plt.title(\"Number of horizontal verifications to reach %9.9f guarantee on durability\" % ExpectedGuarantee)\n",
|
|
"plt.xlabel(\"Nodes verified from K=%d in the EC\" % ECK)\n",
|
|
"plt.ylabel(\"Guarantee of not having catastrophic failures (%)\")\n",
|
|
"ax.plot(hx, HorizontalProb)\n",
|
|
"#plt.yscale('log')\n",
|
|
"plt.show()\n",
|
|
"plt.clf()\n",
|
|
"\n",
|
|
"\n",
|
|
"fig, ax = plt.subplots()\n",
|
|
"plt.grid(axis='x', color='0.9')\n",
|
|
"plt.grid(axis='y', color='0.9')\n",
|
|
"plt.title(\"Number of vertical verifications to reach %9.9f guarantee on durability\" % ExpectedGuarantee)\n",
|
|
"plt.xlabel(\"Number of PoR blocks verified out of %d\" % NbPoRinEC)\n",
|
|
"plt.ylabel(\"Guarantee of not having catastrophic failures (%)\")\n",
|
|
"ax.plot(vx, VerticalProb)\n",
|
|
"#plt.yscale('log')\n",
|
|
"plt.show()\n",
|
|
"\n"
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 1000
|
|
},
|
|
"id": "Y_CIsSJz_Nek",
|
|
"outputId": "1b4b7efb-2990-44b2-dd22-67c257d4ff70"
|
|
},
|
|
"execution_count": 48,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"Probability of detecting catastrophic failures with 40 horizontal verifications is 99.999999931753% (Over nine 9s)\n",
|
|
"Probability of detecting catastrophic failures with 405 vertical verifications 99.999999904926% (Over nine 9s)\n",
|
|
"That is 81.0% of the PoR blocks need to be verified\n"
|
|
]
|
|
},
|
|
{
|
|
"output_type": "display_data",
|
|
"data": {
|
|
"text/plain": [
|
|
"<Figure size 1080x576 with 1 Axes>"
|
|
],
|
|
"image/png": "\n"
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
}
|
|
},
|
|
{
|
|
"output_type": "display_data",
|
|
"data": {
|
|
"text/plain": [
|
|
"<Figure size 1080x576 with 0 Axes>"
|
|
]
|
|
},
|
|
"metadata": {}
|
|
},
|
|
{
|
|
"output_type": "display_data",
|
|
"data": {
|
|
"text/plain": [
|
|
"<Figure size 1080x576 with 1 Axes>"
|
|
],
|
|
"image/png": "\n"
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"source": [
|
|
"# PoR Analysis\n",
|
|
"\n",
|
|
"## Friendly toy scenario\n",
|
|
"\n",
|
|
"Feel free to play with these variables as much as you want in order to simulate any extreme scenario you find interesting to explore."
|
|
],
|
|
"metadata": {
|
|
"id": "Bw-GGBdX6tMm"
|
|
}
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 49,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "n-GB9Vsmp4M1",
|
|
"outputId": "a4ab7563-bc61-4169-89e8-7f7432f542a2"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"\n",
|
|
" *** Network Parameters *** \n",
|
|
"\n",
|
|
"The network has 15400 nodes\n",
|
|
"The average node capacity is 1200 GBs\n",
|
|
"The network has 1206000 files\n",
|
|
"The average file size in the network is 9765 MBs\n",
|
|
"The estimated mean time between failures of the network is 12 hours\n",
|
|
"Malicious nodes cheat the system by NOT storing 5% of the data they should store\n",
|
|
"\n",
|
|
" *** Codex Parameters *** \n",
|
|
"\n",
|
|
"The erasure coding parameter K is 60\n",
|
|
"The erasure coding parameter M is 40\n",
|
|
"The lazy repair parameter L is 20\n",
|
|
"The Proof of Retrievability number of sectors per block is 10\n",
|
|
"The PoR agregation cost is 1 ms per PoR block\n",
|
|
"Durability in number of nines: 9\n",
|
|
"\n",
|
|
" *** Dataset and block structures *** \n",
|
|
"\n",
|
|
"There are 1206000 files of size 10000000 KB (9765.6 MB)\n",
|
|
"The total storage used in the network is 11231.8 TB and there is 6815.1 TB of available storage for a total of 18046.9 TB\n",
|
|
"There will be 100 EC blocks of size 162.8 MBs per file\n",
|
|
"There is a total of 120600000 EC blocks in the network\n",
|
|
"There is an average of 7831.2 EC blocks per node (assuming an homogeneous distribution)\n",
|
|
"Each EC block has 550537.6 PoR blocks of size 0.3 KB\n",
|
|
"\n",
|
|
" *** Durability guarantees and verification frequency *** \n",
|
|
"\n",
|
|
"Guarantee of data durability is 99.999999900\n",
|
|
"Probability of detecting catastrophic failures with 40 horizontal verifications is 99.999999931753% (Over 99.999999900)\n",
|
|
"Probability of detecting catastrophic failures with 405 vertical verifications 99.999999904926% (Over 99.999999900)\n",
|
|
"That is 0.1% of the PoR blocks need to be verified\n",
|
|
"The PoR horizontal verification factor is 40.0\n",
|
|
"The PoR vertical verification factor is 7.4%\n",
|
|
"\n",
|
|
" *** PoR Storage and Time Requirements *** \n",
|
|
"\n",
|
|
"The total storage required for PoR proofs is 14261.6 MB to check a total of 11231.8TB of data each round\n",
|
|
"To verify 40 EC blocks and 7.4% PoR blocks per file in the system:\n",
|
|
"Each node will spend 2114.4 minutes (35.2 hours) agregating PoR proofs\n",
|
|
"It will take the system 480 hours (20.0 days) to accidentally lose M nodes\n",
|
|
"Lazy repair should be triggered after 240 hours (10.0 days)\n",
|
|
"Assuming PoR verifications are done once per lazy repair round, PoR verification takes 14.7% of the time\n",
|
|
"There are 36.5 PoR verification rounds in a year accumulating 508.3 GB of storage\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import random as random\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"plt.rcParams['figure.figsize'] = [15, 8]\n",
|
|
"\n",
|
|
"\n",
|
|
"#@markdown #Network parameters. \n",
|
|
"#@markdown These parameters give the characteristic of the network and we (Codex) have no control over them.\n",
|
|
"\n",
|
|
"#@markdown Number of nodes in the network\n",
|
|
"NetworkSize = 15400 #@param {type:\"slider\", min:100, max:20000, step:100}\n",
|
|
"#@markdown Average storage size per node (in GB)\n",
|
|
"NodeCapacity = 1200 #@param {type:\"slider\", min:100, max:10000, step:100}\n",
|
|
"#@markdown Number of files in the system\n",
|
|
"NumberOfFiles = 1206000 #@param {type:\"slider\", min:1000, max:10000000, step:1000}\n",
|
|
"#@markdown File size (in KB)\n",
|
|
"FileSize = 10000000 #@param {type:\"slider\", min:100, max:10000000, step:100}\n",
|
|
"#@markdown Overall network MTBF (in hours)\n",
|
|
"MTBF = 12 #@param {type:\"slider\", min:1, max:100, step:1}\n",
|
|
"#@markdown Percentage of data a malicious node discards or refuses/avoids to store\n",
|
|
"CheatRatio = 5 #@param {type:\"slider\", min:5, max:95, step:5}\n",
|
|
"#@markdown Predefined Scenario (Set to 1 to overwrite the above parameters with the values from the Storj network)\n",
|
|
"Scenario = 0 #@param {type:\"slider\", min:0, max:1, step:1}\n",
|
|
"\n",
|
|
"#@markdown ----------------------------------------------------\n",
|
|
"#@markdown ----------------------------------------------------\n",
|
|
"#@markdown ----------------------------------------------------\n",
|
|
"\n",
|
|
"\n",
|
|
"#@markdown #Codex parameters\n",
|
|
"#@markdown The Codex team can tune the following parametersto potimize the system\n",
|
|
"\n",
|
|
"#@markdown Erasure Coding parameter K\n",
|
|
"ECK = 60 #@param {type:\"slider\", min:1, max:100, step:1}\n",
|
|
"#@markdown Erasure Coding parameter M\n",
|
|
"ECM = 40 #@param {type:\"slider\", min:1, max:100, step:1}\n",
|
|
"#@markdown Lazy repair parameter L (start repair after L erasures)\n",
|
|
"ECL = 20 #@param {type:\"slider\", min:1, max:100, step:1}\n",
|
|
"#@markdown Number of PoR sectors (PoR Parameter S)\n",
|
|
"PoRS = 10 #@param {type:\"slider\", min:10, max:1000, step:10}\n",
|
|
"#@markdown PoR block agregation cost (in miliseconds)\n",
|
|
"AgregationCost = 1 #@param {type:\"slider\", min:1, max:100, step:1}\n",
|
|
"#@markdown Durability guarantee (three 9s (99.9%) to nine 9s (99.9999999%))\n",
|
|
"Durability = 9 #@param {type:\"slider\", min:3, max:9, step:3}\n",
|
|
"\n",
|
|
"\n",
|
|
"# #@markdown Number of EC blocks to verify among K+M nodes\n",
|
|
"#HorizontalVerification = 40 #@param {type:\"slider\", min:1, max:100, step:1}\n",
|
|
"##@markdown Percentage of PoR blocks to verify inside an EC block\n",
|
|
"#VerticalVerification = 0.2 #@param {type:\"slider\", min:0.1, max:1, step:0.1}\n",
|
|
"\n",
|
|
"\n",
|
|
"if Scenario == 1:\n",
|
|
" print(\"\\n***This is the Storj scenario***\\n\")\n",
|
|
" NetworkSize = 15400\n",
|
|
" NodeCapacity = 1200\n",
|
|
" NumberOfFiles = 1200000\n",
|
|
" FileSize = 10000000\n",
|
|
"\n",
|
|
"print(\"\\n *** Network Parameters *** \\n\")\n",
|
|
"print(\"The network has %d nodes\" % NetworkSize)\n",
|
|
"print(\"The average node capacity is %d GBs\" % NodeCapacity)\n",
|
|
"print(\"The network has %d files\" % NumberOfFiles)\n",
|
|
"print(\"The average file size in the network is %d MBs\" % (FileSize/1024))\n",
|
|
"print(\"The estimated mean time between failures of the network is %d hours\" % MTBF)\n",
|
|
"print(\"Malicious nodes cheat the system by NOT storing %d%% of the data they should store\" % CheatRatio)\n",
|
|
"\n",
|
|
"\n",
|
|
"print(\"\\n *** Codex Parameters *** \\n\")\n",
|
|
"print(\"The erasure coding parameter K is %d\" % ECK)\n",
|
|
"print(\"The erasure coding parameter M is %d\" % ECM)\n",
|
|
"print(\"The lazy repair parameter L is %d\" % ECL)\n",
|
|
"print(\"The Proof of Retrievability number of sectors per block is %d\" % PoRS)\n",
|
|
"print(\"The PoR agregation cost is %d ms per PoR block\" % AgregationCost)\n",
|
|
"print(\"Durability in number of nines: %d\" % Durability)\n",
|
|
"\n",
|
|
"ECblockSize = FileSize/ECK\n",
|
|
"PoRblockSize = 31*PoRS/1024\n",
|
|
"TotalECBlocks = NumberOfFiles*(ECK+ECM)\n",
|
|
"ECBlocksPerNode = TotalECBlocks/NetworkSize\n",
|
|
"NbPoRinEC = ECblockSize/PoRblockSize\n",
|
|
"\n",
|
|
"print(\"\\n *** Dataset and block structures *** \\n\")\n",
|
|
"print(\"There are %d files of size %d KB (%2.1f MB)\" % (NumberOfFiles, FileSize, FileSize/1024))\n",
|
|
"print(\"The total storage used in the network is %2.1f TB and there is %2.1f TB of available storage for a total of %2.1f TB\" % (TotalUsedCapacity, TotalNetworkCapacity-TotalUsedCapacity, TotalNetworkCapacity))\n",
|
|
"print(\"There will be %d EC blocks of size %2.1f MBs per file\" % (ECK+ECM, ECblockSize/1024))\n",
|
|
"print(\"There is a total of %d EC blocks in the network\" % TotalECBlocks)\n",
|
|
"print(\"There is an average of %2.1f EC blocks per node (assuming an homogeneous distribution)\" % ECBlocksPerNode)\n",
|
|
"print(\"Each EC block has %2.1f PoR blocks of size %2.1f KB\" % (NbPoRinEC, PoRblockSize))\n",
|
|
"\n",
|
|
"\n",
|
|
"print(\"\\n *** Durability guarantees and verification frequency *** \\n\")\n",
|
|
"GuaranteeDurability = {3:99.90, 6: 99.99990, 9: 99.99999990}\n",
|
|
"ExpectedGuarantee = GuaranteeDurability[Durability]\n",
|
|
"print(\"Guarantee of data durability is %9.9f\" % ExpectedGuarantee)\n",
|
|
"\n",
|
|
"for NbOfHVerif in range(ECK):\n",
|
|
" prob = (1-(((ECK-1)/100)**NbOfHVerif))*100\n",
|
|
" if prob > ExpectedGuarantee:\n",
|
|
" print(\"Probability of detecting catastrophic failures with %d horizontal verifications is %12.12f%% (Over %9.9f)\" % (NbOfHVerif, prob, ExpectedGuarantee))\n",
|
|
" HorizontalVerification = NbOfHVerif \n",
|
|
" break\n",
|
|
"\n",
|
|
"for NbOfVVerif in range(1, int(NbPoRinEC)):\n",
|
|
" prob = (1-((1-(CheatRatio/100))**NbOfVVerif))*100\n",
|
|
" if prob > ExpectedGuarantee:\n",
|
|
" print(\"Probability of detecting catastrophic failures with %d vertical verifications %12.12f%% (Over %9.9f)\" % (NbOfVVerif, prob, ExpectedGuarantee))\n",
|
|
" print(\"That is %2.1f%% of the PoR blocks need to be verified\" % (NbOfVVerif*100/NbPoRinEC))\n",
|
|
" VerticalVerification = NbOfVVerif*100/NbPoRinEC\n",
|
|
" break\n",
|
|
"\n",
|
|
"print(\"The PoR horizontal verification factor is %2.1f\" % HorizontalVerification)\n",
|
|
"print(\"The PoR vertical verification factor is %2.1f%%\" % (VerticalVerification*100))\n",
|
|
"\n",
|
|
"\n",
|
|
"TotalPoRStorage = NumberOfFiles*HorizontalVerification*PoRblockSize/1024\n",
|
|
"NodeAgregationTime = NbPoRinEC*VerticalVerification*ECBlocksPerNode*(HorizontalVerification/(ECK+ECM))*AgregationCost/(1000*60)\n",
|
|
"PoRPerYear = 365*24/(ECL*MTBF)\n",
|
|
"TotalUsedCapacity = int(NumberOfFiles*FileSize)/int(1024*1024*1024)\n",
|
|
"TotalNetworkCapacity = NetworkSize*NodeCapacity/1024\n",
|
|
"\n",
|
|
"\n",
|
|
"print(\"\\n *** PoR Storage and Time Requirements *** \\n\")\n",
|
|
"print(\"The total storage required for PoR proofs is %2.1f MB to check a total of %2.1fTB of data each round\" % (TotalPoRStorage, TotalUsedCapacity))\n",
|
|
"print(\"To verify %d EC blocks and %2.1f%% PoR blocks per file in the system:\" % (HorizontalVerification, VerticalVerification*100))\n",
|
|
"print(\"Each node will spend %2.1f minutes (%2.1f hours) agregating PoR proofs\" % (NodeAgregationTime, NodeAgregationTime/60))\n",
|
|
"print(\"It will take the system %d hours (%2.1f days) to accidentally lose M nodes\" % (MTBF*ECM, MTBF*ECM/24))\n",
|
|
"print(\"Lazy repair should be triggered after %d hours (%2.1f days)\" % (MTBF*ECL, MTBF*ECL/24))\n",
|
|
"print(\"Assuming PoR verifications are done once per lazy repair round, PoR verification takes %2.1f%% of the time\" % ((NodeAgregationTime*100)/(MTBF*ECL*60)))\n",
|
|
"print(\"There are %2.1f PoR verification rounds in a year accumulating %2.1f GB of storage\" % (PoRPerYear, PoRPerYear*TotalPoRStorage/1024))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
""
|
|
],
|
|
"metadata": {
|
|
"id": "nDubyEGMnOPE"
|
|
},
|
|
"execution_count": null,
|
|
"outputs": []
|
|
}
|
|
]
|
|
} |