diff --git a/analysis/PoR_test_analysis_with_multiple_storage_nodes.ipynb b/analysis/PoR_test_analysis_with_multiple_storage_nodes.ipynb index 505a588..8b7608a 100644 --- a/analysis/PoR_test_analysis_with_multiple_storage_nodes.ipynb +++ b/analysis/PoR_test_analysis_with_multiple_storage_nodes.ipynb @@ -155,7 +155,9 @@ "id": "tHlX0Ch8Dafz" }, "source": [ - "To verify the above functions we set up a small simulation, and evaluate the same probabilities empirically as well." + "To verify the above functions we set up a small simulation, and evaluate the same probabilities empirically as well.\n", + "In this case we also allow for an optional additional layer of erasure coding \"horizontally\" on the row, i.e. among chunks on the\n", + "same node." ] }, { @@ -171,16 +173,19 @@ " chunks = np.random.rand(N, B)\n", " return chunks > p_chunk_error\n", "\n", - "def recoverable(chunks, e):\n", - " bad = chunks.shape[0] - np.count_nonzero(chunks, axis=0) # bad chunks\n", - " return (bad < e).all()\n", + "def recoverable(chunks, ecol, erow=0):\n", + " # ecol: maximum number of recoverable errors in a column\n", + " # erow: maximum number of recoverable errors in a row\n", + " badcol = chunks.shape[0] - np.count_nonzero(chunks, axis=0) # bad chunks per column\n", + " badrow = chunks.shape[1] - np.count_nonzero(chunks, axis=1)\n", + " return not (chunks[badrow > erow][:, badcol > ecol]).any()\n", "\n", - "def Px_recoverable(p_chunk_error, n, k, tries=1000):\n", + "def Px_recoverable(p_chunk_error, n, k, mcol=0, tries=1000, random_chunks=random_chunks):\n", " ## evaluate probability of dataset recoverable empirically\n", - " e = n-k+1 # erased chunks meaning loss\n", + " e = n-k # erased chunks meaning loss\n", " count = 0 \n", " for i in range(tries):\n", - " count += recoverable(random_chunks(p_chunk_error), e)\n", + " count += recoverable(random_chunks(p_chunk_error), e, mcol)\n", " return count/tries\n", "\n", "def random_query(chunks, l):\n", @@ -216,7 +221,7 @@ "\n", "def Stats_random_query_on_chunks(p_chunk_error, n, k, l, tries = 1000):\n", " ## TP: recoverable and test for rec. passed\n", - " e = n-k+1 # erased chunks meaning loss\n", + " e = n-k # maximum number of errors in a column\n", " tp, fp, tn, fn = 0, 0, 0, 0 \n", " for i in range(tries):\n", " chunks = random_chunks(p_chunk_error)\n", @@ -230,7 +235,7 @@ "\n", "def Stats_random_multiquery_on_chunks(p_chunk_error, n, k, l, q=1, maxfail=0, tries = 1000):\n", " ## TP: recoverable and test for rec. passed\n", - " e = n-k+1 # erased chunks meaning loss\n", + " e = n-k # maximum number of errors in a column\n", " tp, fp, tn, fn = 0, 0, 0, 0 \n", " for i in range(tries):\n", " chunks = random_chunks(p_chunk_error)\n",