add optional row (per node) erasure coding

Signed-off-by: Csaba Kiraly <csaba.kiraly@gmail.com>
2026-01-08 00:13:07 +00:00 · 2022-06-23 15:26:18 +02:00 · 2022-06-23 15:26:18 +02:00 · f8436c060b
commit f8436c060b
parent ee0674ace0
1 changed files with 14 additions and 9 deletions
--- a/analysis/PoR_test_analysis_with_multiple_storage_nodes.ipynb
+++ b/analysis/PoR_test_analysis_with_multiple_storage_nodes.ipynb
@ -155,7 +155,9 @@
        "id": "tHlX0Ch8Dafz"
      },
      "source": [
-        "To verify the above functions we set up a small simulation, and evaluate the same probabilities empirically as well."
+        "To verify the above functions we set up a small simulation, and evaluate the same probabilities empirically as well.\n",
        "In this case we also allow for an optional additional layer of erasure coding \"horizontally\" on the row, i.e. among chunks on the\n",
        "same node."
      ]
    },
    {
@ -171,16 +173,19 @@
        "  chunks = np.random.rand(N, B)\n",
        "  return chunks > p_chunk_error\n",
        "\n",
-        "def recoverable(chunks, e):\n",
+        "def recoverable(chunks, ecol, erow=0):\n",
-        "  bad = chunks.shape[0] - np.count_nonzero(chunks, axis=0) # bad chunks\n",
+        "  # ecol: maximum number of recoverable errors in a column\n",
-        "  return (bad < e).all()\n",
+        "  # erow: maximum number of recoverable errors in a row\n",
        "  badcol = chunks.shape[0] - np.count_nonzero(chunks, axis=0) # bad chunks per column\n",
        "  badrow = chunks.shape[1] - np.count_nonzero(chunks, axis=1)\n",
        "  return not (chunks[badrow > erow][:, badcol > ecol]).any()\n",
        "\n",
-        "def Px_recoverable(p_chunk_error, n, k, tries=1000):\n",
+        "def Px_recoverable(p_chunk_error, n, k, mcol=0, tries=1000, random_chunks=random_chunks):\n",
        "  ## evaluate probability of dataset recoverable empirically\n",
-        "  e = n-k+1 # erased chunks meaning loss\n",
+        "  e = n-k # erased chunks meaning loss\n",
        "  count = 0 \n",
        "  for i in range(tries):\n",
-        "    count += recoverable(random_chunks(p_chunk_error), e)\n",
+        "    count += recoverable(random_chunks(p_chunk_error), e, mcol)\n",
        "  return count/tries\n",
        "\n",
        "def random_query(chunks, l):\n",
@ -216,7 +221,7 @@
        "\n",
        "def Stats_random_query_on_chunks(p_chunk_error, n, k, l, tries = 1000):\n",
        "  ## TP: recoverable and test for rec. passed\n",
-        "  e = n-k+1 # erased chunks meaning loss\n",
+        "  e = n-k # maximum number of errors in a column\n",
        "  tp, fp, tn, fn = 0, 0, 0, 0 \n",
        "  for i in range(tries):\n",
        "    chunks = random_chunks(p_chunk_error)\n",
@ -230,7 +235,7 @@
        "\n",
        "def Stats_random_multiquery_on_chunks(p_chunk_error, n, k, l, q=1, maxfail=0, tries = 1000):\n",
        "  ## TP: recoverable and test for rec. passed\n",
-        "  e = n-k+1 # erased chunks meaning loss\n",
+        "  e = n-k # maximum number of errors in a column\n",
        "  tp, fp, tn, fn = 0, 0, 0, 0 \n",
        "  for i in range(tries):\n",
        "    chunks = random_chunks(p_chunk_error)\n",