add optional row (per node) erasure coding

Signed-off-by: Csaba Kiraly <csaba.kiraly@gmail.com>
This commit is contained in:
Csaba Kiraly 2022-06-23 15:26:18 +02:00
parent ee0674ace0
commit f8436c060b
No known key found for this signature in database
GPG Key ID: 0FE274EE8C95166E

View File

@ -155,7 +155,9 @@
"id": "tHlX0Ch8Dafz"
},
"source": [
"To verify the above functions we set up a small simulation, and evaluate the same probabilities empirically as well."
"To verify the above functions we set up a small simulation, and evaluate the same probabilities empirically as well.\n",
"In this case we also allow for an optional additional layer of erasure coding \"horizontally\" on the row, i.e. among chunks on the\n",
"same node."
]
},
{
@ -171,16 +173,19 @@
" chunks = np.random.rand(N, B)\n",
" return chunks > p_chunk_error\n",
"\n",
"def recoverable(chunks, e):\n",
" bad = chunks.shape[0] - np.count_nonzero(chunks, axis=0) # bad chunks\n",
" return (bad < e).all()\n",
"def recoverable(chunks, ecol, erow=0):\n",
" # ecol: maximum number of recoverable errors in a column\n",
" # erow: maximum number of recoverable errors in a row\n",
" badcol = chunks.shape[0] - np.count_nonzero(chunks, axis=0) # bad chunks per column\n",
" badrow = chunks.shape[1] - np.count_nonzero(chunks, axis=1)\n",
" return not (chunks[badrow > erow][:, badcol > ecol]).any()\n",
"\n",
"def Px_recoverable(p_chunk_error, n, k, tries=1000):\n",
"def Px_recoverable(p_chunk_error, n, k, mcol=0, tries=1000, random_chunks=random_chunks):\n",
" ## evaluate probability of dataset recoverable empirically\n",
" e = n-k+1 # erased chunks meaning loss\n",
" e = n-k # erased chunks meaning loss\n",
" count = 0 \n",
" for i in range(tries):\n",
" count += recoverable(random_chunks(p_chunk_error), e)\n",
" count += recoverable(random_chunks(p_chunk_error), e, mcol)\n",
" return count/tries\n",
"\n",
"def random_query(chunks, l):\n",
@ -216,7 +221,7 @@
"\n",
"def Stats_random_query_on_chunks(p_chunk_error, n, k, l, tries = 1000):\n",
" ## TP: recoverable and test for rec. passed\n",
" e = n-k+1 # erased chunks meaning loss\n",
" e = n-k # maximum number of errors in a column\n",
" tp, fp, tn, fn = 0, 0, 0, 0 \n",
" for i in range(tries):\n",
" chunks = random_chunks(p_chunk_error)\n",
@ -230,7 +235,7 @@
"\n",
"def Stats_random_multiquery_on_chunks(p_chunk_error, n, k, l, q=1, maxfail=0, tries = 1000):\n",
" ## TP: recoverable and test for rec. passed\n",
" e = n-k+1 # erased chunks meaning loss\n",
" e = n-k # maximum number of errors in a column\n",
" tp, fp, tn, fn = 0, 0, 0, 0 \n",
" for i in range(tries):\n",
" chunks = random_chunks(p_chunk_error)\n",