fixes after review

2026-02-18 12:23:07 +00:00 · 2025-09-22 14:34:59 +03:00 · 2025-09-22 14:34:59 +03:00 · 5169c5abf6
commit 5169c5abf6
parent 8e5c0dd8a6
15 changed files with 242 additions and 235 deletions
--- a/analysis/DHT-scaling/eval/adv.py
+++ b/analysis/DHT-scaling/eval/adv.py
@ -13,8 +13,8 @@ C_com = H_weeks + 1

 # advertise formula:
 # Adv_bandwidth_per_provider = (C/T) * (14460 * log2(N) + 3744)  [bytes/sec]
-def adv_bandwidth_bytes_per_sec(C, N, T):
-    return (C / T) * (14460 * np.log2(N) + 3744)
+def adv_bandwidth_kb_per_sec(C, N, T):
+    return (C / T) * (14460 * np.log2(N) + 3744) / 1024

 # plot
 fig, ax = plt.subplots(figsize=(10, 6))
@ -30,15 +30,15 @@ colors = ["C0", "C1", "C2", "C3"]
 for idx, (N, N_label) in enumerate(zip(N_values, [r"N = 10,000", r"N = 100,000"])):
    for j, (T, T_lab) in enumerate(zip(T_values, T_labels)):
        label = f"{N_label}, {T_lab}"
-        y = adv_bandwidth_bytes_per_sec(C_com, N, T)
+        y = adv_bandwidth_kb_per_sec(C_com, N, T)
        ax.plot(H_weeks, y, lw=2, label=label, color=colors[idx*2 + j])

 ax.set_xlabel("Community age H (weeks)")
-ax.set_ylabel("Advertise Bandwidth (bytes/sec)")
+ax.set_ylabel("Advertise Bandwidth (KB/sec)")
 ax.set_title("Advertise Bandwidth vs Community Age (per provider/member)", fontsize=14)
 ax.grid(True, linestyle="--", alpha=0.5)
 ax.legend()
-ax.set_yscale("log", base=2)
-ax.set_xscale("log", base=2)
+ax.set_yscale("log", base=10)
+ax.set_xscale("log", base=10)
 fig.tight_layout()
 plt.show()
--- a/analysis/DHT-scaling/eval/adv_multi_com.py
+++ b/analysis/DHT-scaling/eval/adv_multi_com.py
@ -33,8 +33,8 @@ for i, N in enumerate(N_list):
        ax.set_ylabel("Advertise Bandwidth (KB/sec)")
        ax.grid(True, linestyle="--", alpha=0.5)
        ax.legend()
-        ax.set_yscale("log", base=2)
-        ax.set_xscale("log", base=2)
+        ax.set_yscale("log", base=10)
+        ax.set_xscale("log", base=10)
 plt.suptitle("Advertise Bandwidth vs Time (per provider) for various (N, T)")
 plt.tight_layout(rect=[0, 0.03, 1, 0.95])
 plt.show()
--- a/analysis/DHT-scaling/eval/adv_multi_com_per_sec.png
+++ b/analysis/DHT-scaling/eval/adv_multi_com_per_sec.png
--- a/analysis/DHT-scaling/eval/adv_only.png
+++ b/analysis/DHT-scaling/eval/adv_only.png
--- a/analysis/DHT-scaling/eval/maint_per_day.png
+++ b/analysis/DHT-scaling/eval/maint_per_day.png
--- a/analysis/DHT-scaling/eval/maint_per_sec.png
+++ b/analysis/DHT-scaling/eval/maint_per_sec.png
--- a/analysis/DHT-scaling/eval/maintainance.py
+++ b/analysis/DHT-scaling/eval/maintainance.py
@ -4,14 +4,14 @@ import matplotlib.pyplot as plt
 # Range of N values
 N = np.linspace(1e4, 1e5, 500)

-# Maintenance bandwidth formula
-bandwidth_maint = 6.67 + 48.2 * np.log2(N)
+# Maintenance bandwidth formula (converted to KB/sec)
+bandwidth_maint = (6.67 + 48.2 * np.log2(N)) / 1024

 # Plot
 plt.figure(figsize=(8,5))
 plt.plot(N, bandwidth_maint, label=r'Maintenance Bandwidth')
 plt.xlabel('N (number of DHT nodes)')
-plt.ylabel('Maintenance Bandwidth (bytes/sec)')
+plt.ylabel('Maintenance Bandwidth (KB/sec)')
 plt.title('Maintenance Cost For Varying Number of Nodes')
 plt.grid(True, linestyle="--", alpha=0.6)
 plt.legend()
--- a/analysis/DHT-scaling/eval/maintainance_day.py
+++ b/analysis/DHT-scaling/eval/maintainance_day.py
@ -7,15 +7,15 @@ N = np.linspace(1e4, 1e5, 500)
 # Maintenance bandwidth per second (bytes/s)
 bandwidth_per_sec = 6.67 + 48.2 * np.log2(N)

-# Convert to MB per day
-bandwidth_per_day_MB = bandwidth_per_sec * 86400 / 1e6
+# Convert to KB per day
+bandwidth_per_day_KB = bandwidth_per_sec * 86400 / 1e3

 # Plot
 plt.figure(figsize=(8,5))
-plt.plot(N, bandwidth_per_day_MB, color='navy', label=r'Maintenance Bandwidth (mb/day)')
+plt.plot(N, bandwidth_per_day_KB, color='navy', label=r'Maintenance Bandwidth (KB/day)')
 plt.xlabel('N (number of DHT nodes)')
-plt.ylabel('Maintenance Bandwidth (mb/day)')
-plt.title('Maintenance Cost For Varying Number of Nodes (per day, mb)')
+plt.ylabel('Maintenance Bandwidth (KB/day)')
+plt.title('Maintenance Cost For Varying Number of Nodes (per day, KB)')
 plt.grid(True, linestyle="--", alpha=0.6)
 plt.legend()
 plt.show()
--- a/analysis/DHT-scaling/eval/query.py
+++ b/analysis/DHT-scaling/eval/query.py
@ -11,7 +11,6 @@ P_buckets = {
    "large (P=1000)": 1000
 }

-# Query: (bytes per query)
 def query_payload_bytes(N, P):
    return 14460.0 * np.log2(N) + 16.0 * (33.0 + 305.0 * P)

@ -20,10 +19,12 @@ def Q_per_content(C, P):
    W = 7 * 86400.0
    return (P / W) * (0.05 + 0.20 / C)

-# Query bandwidth BW_query(C) = C * Q(C) * query_payload_bytes
-def BW_query_bytes_per_sec(C, P, N):
+# Query bandwidth in KB/sec
+def BW_query_kb_per_sec(C, P, N):
    Q = Q_per_content(C, P)
-    return C * Q * query_payload_bytes(N, P)
+    capped_P = min(P, 100)
+    # (C * Q * (14460 * np.log2(N) + 528 + 16 * (np.ceil(P/5) * 5 + 300*P))) / (N * 1024)
+    return (C * Q * (14460 * np.log2(N) + 528 + 16 * (np.ceil(capped_P/5) * 5 + 300*capped_P))) / (N * 1024)

 # Community age (H_weeks) and content counts (C)
 H_weeks = np.arange(1, 513)
@ -36,12 +37,12 @@ axs = axs.flatten()
 for i, N in enumerate(N_values):
    ax = axs[i]
    for label, P in P_buckets.items():
-        y = BW_query_bytes_per_sec(C_vals, P, N)
+        y = BW_query_kb_per_sec(C_vals, P, N)
        ax.plot(H_weeks, y, label=label, linewidth=2)
-    ax.set_xscale("log", base=2)
-    ax.set_yscale("log", base=2)
+    ax.set_xscale("log", base=10)
+    ax.set_yscale("log", base=10)
    ax.set_xlabel("Community age H (weeks)")
-    ax.set_ylabel("Query Bandwidth (bytes/sec)")
+    ax.set_ylabel("Query Bandwidth (KB/sec)")
    ax.set_title(f"N={int(N):,}")
    ax.grid(True, linestyle="--", alpha=0.5)
    ax.legend()
--- a/analysis/DHT-scaling/eval/query_only.png
+++ b/analysis/DHT-scaling/eval/query_only.png
--- a/analysis/DHT-scaling/eval/table.py
+++ b/analysis/DHT-scaling/eval/table.py
@ -31,9 +31,8 @@ def Q_per_content(C, P):


 def fmt(x):
-    # format bytes/sec in a nicer way
+    # format KB/sec in a nicer way
    if x == 0: return "0"
-    if x >= 1e9:  return f"{x/1e9:.3f}e9"
    if x >= 1e6:  return f"{x/1e6:.3f}e6"
    if x >= 1e3:  return f"{x/1e3:.3f}e3"
    return f"{x:.3f}"
@ -47,23 +46,23 @@ for N in N_values:
        lines.append(f"\n### N = {int(N):,}, T = {int(T/3600) if T>=3600 else T//60} {'h' if T>=3600 else 'min'}\n")
        for bucket, P in P_buckets.items():
            lines.append(f"\n**{bucket}**\n")
-            lines.append("| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |\n")
-            lines.append("|---:|---:|---:|---:|---:|---:|\n")
-            M = maint_bytes_per_sec(N)  # constant over H in a given (N)
-            Adv_payload = advert_payload_bytes(N)
+            lines.append("| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |\n")
+            lines.append("|---:|---:|---:|---:|---:|\n")
+            M = maint_bytes_per_sec(N) / 1024.0  # constant over H in a given (N)
+            Adv_payload = advert_payload_bytes(N) / 1024.0
            for H in H_samples:
                C = H + 1
                # traffic categories:
                maint = M
                advert = (C / T) * Adv_payload
                Qc = Q_per_content(C, P)
-                query = C * Qc * query_payload_bytes(N, p_msg=P)
+                query = (C * Qc * (14460 * math.log2(N) + 528 + 16 * (math.ceil(min(P,100)/5) * 5 + 300 * min(P,100)))) / N / 1024.0
                total = maint + advert + query
                # percentages:
                maint_pct = (maint / total) * 100.0 if total > 0 else 0.0
                advert_pct = (advert / total) * 100.0 if total > 0 else 0.0
                query_pct = (query / total) * 100.0 if total > 0 else 0.0
-                lines.append(f"| {H:>3d} | {C:>3d} | {fmt(total)} | {maint_pct:6.2f}% | {advert_pct:6.2f}% | {query_pct:6.2f}% |\n")
+                lines.append(f"| {C:>3d} | {fmt(total)} | {maint_pct:6.2f}% | {advert_pct:6.2f}% | {query_pct:6.2f}% |\n")

 with open(output_file, "w", encoding="utf-8") as f:
    f.writelines(lines)
--- a/analysis/DHT-scaling/eval/tables.md
+++ b/analysis/DHT-scaling/eval/tables.md
@ -2,115 +2,115 @@
 ### N = 10,000, T = 30 min

 **small (P=10)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 3.592e3 |  18.02% |  81.81% |   0.17% |
-|  52 |  53 | 6.426e3 |  10.07% |  89.75% |   0.18% |
-| 104 | 105 | 12.095e3 |   5.35% |  94.47% |   0.18% |
-| 208 | 209 | 23.434e3 |   2.76% |  97.06% |   0.18% |
-| 416 | 417 | 46.111e3 |   1.40% |  98.41% |   0.18% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 3.501 |  18.05% |  81.95% |   0.00% |
+|  53 | 6.264 |  10.09% |  89.91% |   0.00% |
+| 105 | 11.791 |   5.36% |  94.64% |   0.00% |
+| 209 | 22.843 |   2.77% |  97.23% |   0.00% |
+| 417 | 44.948 |   1.41% |  98.59% |   0.00% |

 **mid (P=250)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 4.491e3 |  14.41% |  65.43% |  20.16% |
-|  52 |  53 | 8.079e3 |   8.01% |  71.39% |  20.60% |
-| 104 | 105 | 15.256e3 |   4.24% |  74.90% |  20.86% |
-| 208 | 209 | 29.610e3 |   2.19% |  76.81% |  21.00% |
-| 416 | 417 | 58.319e3 |   1.11% |  77.81% |  21.08% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 3.501 |  18.05% |  81.95% |   0.00% |
+|  53 | 6.265 |  10.09% |  89.91% |   0.00% |
+| 105 | 11.791 |   5.36% |  94.64% |   0.00% |
+| 209 | 22.844 |   2.77% |  97.23% |   0.00% |
+| 417 | 44.949 |   1.41% |  98.59% |   0.00% |

 **large (P=1000)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 16.586e3 |   3.90% |  17.72% |  78.38% |
-|  52 |  53 | 30.319e3 |   2.13% |  19.02% |  78.84% |
-| 104 | 105 | 57.785e3 |   1.12% |  19.77% |  79.11% |
-| 208 | 209 | 112.717e3 |   0.57% |  20.18% |  79.25% |
-| 416 | 417 | 222.581e3 |   0.29% |  20.39% |  79.32% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 3.502 |  18.05% |  81.95% |   0.00% |
+|  53 | 6.265 |  10.09% |  89.91% |   0.00% |
+| 105 | 11.791 |   5.36% |  94.64% |   0.01% |
+| 209 | 22.844 |   2.77% |  97.23% |   0.01% |
+| 417 | 44.951 |   1.41% |  98.59% |   0.01% |

 ### N = 10,000, T = 22 h

 **small (P=10)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 720.105 |  89.87% |   9.27% |   0.86% |
-|  52 |  53 | 789.601 |  81.96% |  16.60% |   1.44% |
-| 104 | 105 | 928.592 |  69.69% |  27.97% |   2.34% |
-| 208 | 209 | 1.207e3 |  53.63% |  42.84% |   3.52% |
-| 416 | 417 | 1.763e3 |  36.72% |  58.52% |   4.77% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 0.697 |  90.65% |   9.35% |   0.00% |
+|  53 | 0.760 |  83.16% |  16.84% |   0.00% |
+| 105 | 0.886 |  71.36% |  28.64% |   0.00% |
+| 209 | 1.137 |  55.59% |  44.41% |   0.00% |
+| 417 | 1.639 |  38.55% |  61.45% |   0.00% |

 **mid (P=250)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 1.619e3 |  39.97% |   4.12% |  55.90% |
-|  52 |  53 | 2.442e3 |  26.50% |   5.37% |  68.14% |
-| 104 | 105 | 4.089e3 |  15.83% |   6.35% |  77.82% |
-| 208 | 209 | 7.383e3 |   8.77% |   7.00% |  84.23% |
-| 416 | 417 | 13.970e3 |   4.63% |   7.38% |  87.99% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 0.697 |  90.64% |   9.35% |   0.01% |
+|  53 | 0.760 |  83.15% |  16.84% |   0.01% |
+| 105 | 0.886 |  71.35% |  28.63% |   0.02% |
+| 209 | 1.137 |  55.58% |  44.40% |   0.03% |
+| 417 | 1.640 |  38.54% |  61.42% |   0.03% |

 **large (P=1000)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 13.714e3 |   4.72% |   0.49% |  94.79% |
-|  52 |  53 | 24.682e3 |   2.62% |   0.53% |  96.85% |
-| 104 | 105 | 46.618e3 |   1.39% |   0.56% |  98.05% |
-| 208 | 209 | 90.489e3 |   0.72% |   0.57% |  98.71% |
-| 416 | 417 | 178.232e3 |   0.36% |   0.58% |  99.06% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 0.697 |  90.62% |   9.35% |   0.02% |
+|  53 | 0.760 |  83.12% |  16.84% |   0.04% |
+| 105 | 0.886 |  71.31% |  28.62% |   0.07% |
+| 209 | 1.138 |  55.54% |  44.36% |   0.10% |
+| 417 | 1.641 |  38.50% |  61.36% |   0.14% |

 ### N = 100,000, T = 30 min

 **small (P=10)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 4.473e3 |  18.05% |  81.79% |   0.17% |
-|  52 |  53 | 8.003e3 |  10.09% |  89.74% |   0.17% |
-| 104 | 105 | 15.062e3 |   5.36% |  94.47% |   0.17% |
-| 208 | 209 | 29.180e3 |   2.77% |  97.06% |   0.17% |
-| 416 | 417 | 57.416e3 |   1.41% |  98.42% |   0.18% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 4.361 |  18.08% |  81.92% |   0.00% |
+|  53 | 7.802 |  10.10% |  89.90% |   0.00% |
+| 105 | 14.683 |   5.37% |  94.63% |   0.00% |
+| 209 | 28.446 |   2.77% |  97.23% |   0.00% |
+| 417 | 55.972 |   1.41% |  98.59% |   0.00% |

 **mid (P=250)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 5.402e3 |  14.94% |  67.73% |  17.32% |
-|  52 |  53 | 9.710e3 |   8.31% |  73.96% |  17.72% |
-| 104 | 105 | 18.327e3 |   4.40% |  77.64% |  17.96% |
-| 208 | 209 | 35.559e3 |   2.27% |  79.65% |  18.08% |
-| 416 | 417 | 70.025e3 |   1.15% |  80.70% |  18.15% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 4.361 |  18.08% |  81.92% |   0.00% |
+|  53 | 7.802 |  10.10% |  89.90% |   0.00% |
+| 105 | 14.683 |   5.37% |  94.63% |   0.00% |
+| 209 | 28.446 |   2.77% |  97.23% |   0.00% |
+| 417 | 55.972 |   1.41% |  98.59% |   0.00% |

 **large (P=1000)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 17.590e3 |   4.59% |  20.80% |  74.61% |
-|  52 |  53 | 32.120e3 |   2.51% |  22.36% |  75.13% |
-| 104 | 105 | 61.180e3 |   1.32% |  23.26% |  75.42% |
-| 208 | 209 | 119.300e3 |   0.68% |  23.74% |  75.58% |
-| 416 | 417 | 235.541e3 |   0.34% |  23.99% |  75.67% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 4.361 |  18.08% |  81.92% |   0.00% |
+|  53 | 7.802 |  10.10% |  89.90% |   0.00% |
+| 105 | 14.684 |   5.37% |  94.63% |   0.00% |
+| 209 | 28.446 |   2.77% |  97.23% |   0.00% |
+| 417 | 55.972 |   1.41% |  98.59% |   0.00% |

 ### N = 100,000, T = 22 h

 **small (P=10)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 897.828 |  89.91% |   9.26% |   0.83% |
-|  52 |  53 | 984.126 |  82.03% |  16.59% |   1.39% |
-| 104 | 105 | 1.157e3 |  69.79% |  27.96% |   2.26% |
-| 208 | 209 | 1.502e3 |  53.75% |  42.86% |   3.39% |
-| 416 | 417 | 2.192e3 |  36.82% |  58.58% |   4.60% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 0.870 |  90.66% |   9.34% |   0.00% |
+|  53 | 0.948 |  83.18% |  16.82% |   0.00% |
+| 105 | 1.104 |  71.40% |  28.60% |   0.00% |
+| 209 | 1.417 |  55.64% |  44.36% |   0.00% |
+| 417 | 2.043 |  38.60% |  61.40% |   0.00% |

 **mid (P=250)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 1.826e3 |  44.20% |   4.55% |  51.25% |
-|  52 |  53 | 2.691e3 |  29.99% |   6.07% |  63.94% |
-| 104 | 105 | 4.421e3 |  18.26% |   7.31% |  74.43% |
-| 208 | 209 | 7.881e3 |  10.24% |   8.17% |  81.59% |
-| 416 | 417 | 14.801e3 |   5.45% |   8.68% |  85.87% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 0.870 |  90.66% |   9.34% |   0.00% |
+|  53 | 0.948 |  83.18% |  16.82% |   0.00% |
+| 105 | 1.104 |  71.40% |  28.60% |   0.00% |
+| 209 | 1.417 |  55.64% |  44.36% |   0.00% |
+| 417 | 2.043 |  38.60% |  61.40% |   0.00% |

 **large (P=1000)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 14.014e3 |   5.76% |   0.59% |  93.65% |
-|  52 |  53 | 25.101e3 |   3.22% |   0.65% |  96.13% |
-| 104 | 105 | 47.275e3 |   1.71% |   0.68% |  97.61% |
-| 208 | 209 | 91.622e3 |   0.88% |   0.70% |  98.42% |
-| 416 | 417 | 180.317e3 |   0.45% |   0.71% |  98.84% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 0.870 |  90.66% |   9.34% |   0.00% |
+|  53 | 0.948 |  83.18% |  16.82% |   0.00% |
+| 105 | 1.104 |  71.39% |  28.60% |   0.01% |
+| 209 | 1.417 |  55.63% |  44.36% |   0.01% |
+| 417 | 2.043 |  38.59% |  61.40% |   0.01% |
--- a/analysis/DHT-scaling/eval/total.py
+++ b/analysis/DHT-scaling/eval/total.py
@ -38,18 +38,18 @@ def Q_per_content(C, P):

 def total_bw_bytes_per_sec(N, T, C, P):
    """
-    Total bandwidth per provider (bytes/sec) as a function of N, T, C, P:
+    Total bandwidth per provider (KB/sec) as a function of N, T, C, P:
    BW_total = maintenance + advertise + query
    where:
      maintenance = (6.67 + 48.2*log2(N))
      advertise   = (C/T) * [14460*log2(N) + 3744]
-      query       = C * Q(C) * [14460*log2(N) + 16*(33 + 305*P)]
+      query       = (C * Q(C) * (14460*log2(N) + 528 + 16*(ceil(min(P,100)/5)*5 + 300*min(P,100)))) / N
    """
    maint = maint_bytes_per_sec(N)
    advert = (C / T) * advert_payload_bytes(N)
    Qc = Q_per_content(C, P)
-    query = C * Qc * query_payload_bytes(N, P)
-    return maint + advert + query
+    query = (C * Qc * (14460 * np.log2(N) + 528 + 16 * (np.ceil(min(P,100)/5) * 5 + 300*min(P,100)))) / N
+    return (maint + advert + query) / 1024

 # Plotting 2x2 grid (N x T), 3 curves per subplot (P buckets)
 fig, axs = plt.subplots(2, 2, figsize=(14, 10), sharex=True, sharey=True)
@ -63,11 +63,11 @@ for i, N in enumerate(N_values):
            y = total_bw_bytes_per_sec(N, T, C_vals, P)
            ax.plot(H_weeks, y, color=colors[k % len(colors)], lw=2, label=label)
        ax.set_xlabel("Community age H (weeks)")
-        ax.set_ylabel("Total Bandwidth (bytes/sec)")
+        ax.set_ylabel("Total Bandwidth (KB/sec)")
        ax.set_title(f"N={int(N):,}, T={int(T/3600) if T>=3600 else T//60} {'h' if T>=3600 else 'min'}")
        ax.grid(True, linestyle="--", alpha=0.5)
-        ax.set_xscale("log", base=2)
-        ax.set_yscale("log", base=2)
+        ax.set_xscale("log", base=10)
+        ax.set_yscale("log", base=10)
        ax.legend()

 plt.tight_layout()
--- a/analysis/DHT-scaling/eval/total_bandwidth.png
+++ b/analysis/DHT-scaling/eval/total_bandwidth.png
--- a/analysis/DHT-scaling/swarm-participant-upper-bounds-of-DHT.md
+++ b/analysis/DHT-scaling/swarm-participant-upper-bounds-of-DHT.md
@ -3,25 +3,26 @@
 Description:
 > We need to have a clear understanding of what the upper bounds of our proposed solution to replace bittorrent in status-go will be. In particular, replacing a single message archive torrent (index + ever-growing data) with an index CID and a CID for each message archive dataset will cause DHT strain. Every 7 days, a new archive and updated index will be uploaded, and replicated to all members of the swarm. What is the upper bound of community members and average number of communities per node that can be supported by the DHT for such a system?

-Some more Context: [Guiliano's write-up](https://hackmd.io/tw3oYZ10S8Kg1EoFJdRLpw) 
+Some more Context: [Giuliano's write-up](https://hackmd.io/tw3oYZ10S8Kg1EoFJdRLpw) 

 This boils down to answering/measuring two things:
 - **What is the upper bound of community members?** meaning, how many members can a community contain without straining the DHT.
 - **What is the average number of communities per-node?** meaning, given that we expect each community to contain x number of members, how many communities can a node be part of?

+
 So how to go about analyzing this? We can follow these steps:
 - Start with understanding the DHT traffic first, [Chrys's write-up](https://hackmd.io/tzafQHRgRfKcKAfQ-xBm3Q) categorizes these so we can start there.
 - The parameters or variables involved in each of these traffic categories, and the formulas to estimate traffic (for each category) based on these params/variables.
 - Based on our use-case (the status community archive) we can select which traffic is mostly expected and what ranges the variables in + measurements of msg sizes (to estimate bandwidth).
 - Run some calculations (on expected variable ranges) to estimate the expected bandwidth per-provider (community member), and create figures/plots to show how it scales with increasing number of contents (archive CIDs). 
- Based on these numbers, we can then estimate the lower/upper bounds of the number of communities and number of members in each of these communities. This ofcourse would depend what is an acceptable bandwidth that a node can handle. 
+- Based on these numbers, we can then estimate the lower/upper bounds of the number of communities and number of members in each of these communities. This ofcourse would depend on what is an acceptable bandwidth that a node can handle. 

 ## DHT Traffic
 Let's look at the source of traffic in DHTs. I will use some of the terms and symbols from [Chrys's write-up](https://hackmd.io/tzafQHRgRfKcKAfQ-xBm3Q) for consistency. I'm restating it here with more elaboration mainly for (me) understanding. 

 **Maintenance Traffic**
 The traffic overhead to keep the overlay network healthy.
- Generated by: All DHT participants regardless of whether they store or query content.
+- Generated by: All DHT participants regardless of whether they store or query contents.
 - Purpose: DHT network topology - each node periodically:
    - Pings its neighbors (keep-alives).
    - Refreshes its routing table.
@ -41,7 +42,7 @@ The traffic generated when nodes publish “I can provide content X”. This is
    - TTL/Refresh frequency `T`
    - churn rate of providers `λ`.

-This is the major cause of traffic in our use-case and is proportional to the number of members in the community and grows endlessly with time as more archive files are created. 
+This is the major cause of traffic in our use-case this is proportional to the number of members in the community and grows endlessly with time as more archive files are created. 

 **Query Traffic**
 The traffic from nodes asking “Who has content X?”, in Codex this is `getProviders(nodeId/CID)`
@ -68,7 +69,7 @@ Let's look at the variables involved in our use-case. Recall that the archive is

 If we want to measure the DHT strain/traffic per-community we can split `C` to:
 $C_{com} = H + 1$: number of archive files per-community, where `H` is community age in weeks.
-$C_{all} = \sum_0^k (H + 1)$: number of archive files for all `k` communities a node is part of, where `H` is community age in weeks.
+$C_{all} = \sum_{i=0}^k (H_i + 1)$: number of archive files for all `k` communities a node is part of, where `H` is community age in weeks.

 Note: we add 1 above for the index file. 

@ -125,7 +126,7 @@ Bandwidth_{maintenance} \text{ (per-node)} =
 $$

 For **Content/advertize traffic**:
-Every provider node that stores a content key (CID) must (re-)advertize it every time-interval `T`. Each advertize has a cost, let's use the term $C_{adv}$ for cost-per-advertize (in RPC). The cost to advertise a single content is two parts: (1) find the `K` closest nodes to the CID in the DHT keyspace, (2) send `addProvider` msgs to these `K` nodes. This is summarized in the following formula:
+Every provider node that stores a content key (CID) must (re-)advertize it every time-interval `T`. Each advertize has a cost, let's use the term $C_{adv}$ for cost-per-advertize (in RPC). The cost to advertize a single content is two parts: (1) find the `K` closest nodes to the CID in the DHT keyspace, (2) send `addProvider` msgs to these `K` nodes. This is summarized in the following formula:

 $$
 RPC_{adv} \text{ (per-content, per-provider)} = C_{lookup} + K
@ -144,7 +145,7 @@ $$
 Bandwidth_{adv}^{\text{ (provider)}} = C \times Bandwidth_{adv}^{\text{ (content,provider)}} \text{ (bytes/sec)}
 $$

-Again in the above formulas I'm ignoring churn rate and assuming a constant number of providers `P`
+Again in the above formulas I'm ignoring churn rate and assuming constant number of providers `P`


 For **query traffic**:
@ -158,17 +159,17 @@ $$

 To measure bandwidth we need msg sizes again so let's introduce new symbols:
 - $S_{GP}$ : GetProvider, a request to get the providers for a content key, measured = 33 bytes. 
- $S_{PM}$ : ProvidersMessage, reponse containing a list of `P` providers - approx equals to `5 + P*300` bytes. Note here that `P` is 5 max but if more than 5 then multiple msgs are sent, for simplicity we can just assume `P*305`. 
+- $S_{PM}$ : ProvidersMessage, reponse containing a list of `P` providers - approx equals to `5 + P*300` bytes. Note here that `P` is 5 max but if more than 5 then multiple msgs are sent, for simplicity we will also assume that the response will contain a max of 100 random providers `P`. With this in mind, we can then say $S_{PM} = \lceil P/5 \rceil * 5 + P*300$

 The per-content bandwidth (bytes/sec) formula is
 $$
-Bandwidth_{query}^{\text{content}} = Q \times C_{lookup} \times (S_{find} + S_{nodes}) + K \times (S_{GP} + S_{PM}) \text{ (bytes/sec)}
+Bandwidth_{query}^{\text{content}} = Q \times \Big(C_{lookup} \times (S_{find} + S_{nodes}) + K \times (S_{GP} + S_{PM})\Big) \text{ (bytes/sec)}
 $$

 Then we can estimate the per-node bandwidth using:
 $$
-\text{Bandwidth}_{query} =
-C \times Bandwidth_{query}^{\text{content}}
+\text{Bandwidth}_{query} = \frac {
+C \times Bandwidth_{query}^{\text{content}}}{N}
 $$

 Note that the above formula calculates the bandwidth per-node/provider given the query rate `Q`, but this rate is expected to be equal for all providers. There is no "symmetry" here, i.e. if you are a part of certain communities, this rate might be high/low. This depends on few things:
@ -188,7 +189,7 @@ $$
 \text{Bandwidth}_{\text{total}}
 &= \frac{S_{\text{ping}} + S_{\text{pong}}}{\tau_{\text{keepalive}}} + \frac{C_{\text{lookup}}\,(S_{\text{find}}+S_{\text{nodes}})}{\tau_{\text{refresh}}}
 \\ &\quad + \frac{C}{T}\Big(C_{\text{lookup}}\,(S_{\text{find}}+S_{\text{nodes}}) + K\,S_{\text{AP}}\Big)
-\\ &\quad + C\,Q \Big(C_{\text{lookup}}\,(S_{\text{find}}+S_{\text{nodes}}) + K\,(S_{\text{GP}}+S_{\text{PM}})\Big)
+\\ &\quad + \frac{ C\,Q \Big(C_{\text{lookup}}\,(S_{\text{find}}+S_{\text{nodes}}) + K\,(S_{\text{GP}}+S_{\text{PM}})\Big)}{N}
 \end{aligned}
 $$

@ -200,7 +201,7 @@ $$
 &= \frac{S_{\text{ping}} + S_{\text{pong}}}{\tau_{\text{keepalive}}} \\
 &\quad + \frac{\alpha \log_{2}(N)\,(S_{\text{find}} + S_{\text{nodes}})}{\tau_{\text{refresh}}} \\
 &\quad + \frac{C}{T}\Big(\alpha \log_{2}(N)\,(S_{\text{find}} + S_{\text{nodes}}) + K\,S_{\text{AP}}\Big) \\
-&\quad + C\,Q \Big(\alpha \log_{2}(N)\,(S_{\text{find}} + S_{\text{nodes}}) + K\,(S_{\text{GP}} + S_{\text{PM}})\Big)
+&\quad + \frac{ C\,Q \Big(\alpha \log_{2}(N)\,(S_{\text{find}} + S_{\text{nodes}}) + K\,(S_{\text{GP}} + S_{\text{PM}})\Big)}{N}
 \end{aligned}
 $$

@ -219,7 +220,7 @@ In the formula for total bandwidth, we can plug-in our measured values for msg s
 | $S_{nodes}$         | NODES response (max)                       | ≈ 4800 bytes                       |
 | $S_{AP}$            | AddProvider request size                   | 234 bytes                          |
 | $S_{GP}$            | GetProviders request size                  | 33 bytes                           |
-| $S_{PM}$            | ProvidersMessage response size             | 305*P bytes (P = providers)|
+| $S_{PM}$            | ProvidersMessage response size             | $S_{PM} = \lceil P/5 \rceil * 5 + P*300$ bytes (P = providers with max 100)|

 Plugging-in these numbers in the formula would give us:

@ -229,7 +230,7 @@ $$
 &= \frac{15 + 35}{7.5} \\
 &\quad + \frac{3 \log_{2}(N)\,(20 + 4800)}{300} \\
 &\quad + \frac{C}{T}\Big(3 \log_{2}(N)\,(20 + 4800) + 16 \cdot 234\Big) \\
-&\quad + C\,Q \Big(3 \log_{2}(N)\,(20 + 4800) + 16\,(33 + 305P)\Big)
+&\quad + \frac{ C\,Q \Big(3 \log_{2}(N)\,(20 + 4800) + 16\,(33 + \lceil P/5 \rceil * 5 + 300P)\Big)}{N}
 \end{aligned}
 $$

@ -240,7 +241,7 @@ $$
 &= 6.67 \\
 &\quad + 48.2 \,\log_{2}(N) \\
 &\quad + \frac{C}{T}\,\big(14460 \,\log_{2}(N) + 3744\big) \\
-&\quad + C\,Q\,\big(14460 \,\log_{2}(N) + 528 + 4880P\big)
+&\quad + \frac{ C\,Q\,\big(14460 \,\log_{2}(N) + 528 + 16\,(\lceil P/5 \rceil * 5 + 300P)\big)}{N}
 \end{aligned}
 $$

@ -249,13 +250,13 @@ Now let's talk about expected ranges for each of the variables above ($N,P,C,T,Q
 `N` the number of nodes in the DHT, in our use-case this would be all status (desktop?) nodes, i.e. all members of all communities. 
 This could range from 10K - 100K nodes depending on how popular status is, we can maybe get some numbers?

-`P` the number of providers per-content. This depends on the size of the community, i.e. the larger the community, the mode providers would be because we assume all community members replicate the data. We can estimate this to be in the range 10-1000 members, but we can also assume that about %50-%70 of them are online at any one time, so `P` would be about half of the expected number of members and then we can also ignore churn rate, and expect constant number of providers. Note here that since all members are providers, it means for large communities, the list providers for content key (CID) is long, so the response to `getProviders` would be a big list. Although if there is a limit to how many providers is in the response then this won't be a bandwidth issue, maybe a storage issue (for storing that long list)!?
+`P` the number of providers per-content. This depends on the size of the community, i.e. the larger the community, the more providers would be because we assume all community members replicate the data. We can estimate this to be in the range 10-1000 members, but we can also assume that about %50-%70 of them are online at any one time, so `P` would be about half of the expected number of members and then we can also ignore churn rate, and expect constant number of providers. Note here that since all members are providers, it means for large communities, the list providers for content key (CID) is long, so the response to `getProviders` would be a big list. Although if there is a limit to how many providers is in the response then this won't be a bandwidth issue, maybe a storage issue (for storing that long list)!?

 `C` the number of content pieces/keys. This variable in our setting increases with time (in weeks) and so we should model this as function of time since the begining of each community. Each community would have about 52 archive files/CIDs a year then multiply this by the number of communities to get `C`. See $C_{com}$ and $C_{all}$ described previously.

 `T` provider refresh time interval. Looking at the bittorrent docs, this value is about 15-30min with TTL of about 2hr, but in libp2p this values much higher, around 22h with TTL of 48hr. In Codex, the blockExchange protocol advertizes every 30min with TTL of 24hr. The range is then 30min-24h for `T`

-`Q` the query rate per-content. This I would expect to be small in our setting but depends on how often nodes/community members go offline. In a stable state, every member has the archive and there are no queries. However, as stated earlier there are two cases in which nodes/community members would query the archive data (or parts of it): (1) When joining the community you request the whole archive. (2) When a node go offline for a while and then comes back then the request would be only for part of the archive and how many parts (CIDs) depend on how long it has been offline. The first situation is easier to estimate than the second. We can try to experess this as a function of size of the community `P`, Let's give it a try here:
+`Q` the query rate per-content. This I would expect to be small in our setting but depends on how often nodes/community members go offline. In a stable state, every member has the archive and there are no queries. However, as stated earlier there are two cases in which nodes/community members would query the archive data (or parts of it): (1) When joining the community you request the whole archive. (2) When a node go offline for a while (more than 1 week) and then comes back then the request would be only for part of the archive and how many parts (CIDs) depend on how long it has been offline. The first situation is easier to estimate than the second. We can try to experess this as a function of size of the community `P`, Let's give it a try here:
 We can consider these factors:
 - $\lambda_{\text{join}}$ : community-wide new join rate (joins per second) 
 - $\lambda_{\text{churn}}$ : per-provider churn rate (avg rejoins per second) 
@ -269,7 +270,7 @@ So given that we expect $P \cdot \lambda_{\text{churn}}$ providers to rejoin per

 Now with `Q` we want the query rate **per-content** so we divide by `C` to get: $P \cdot \lambda_{\text{churn}} \cdot \theta_{\text{re}}$

-Therefore, the query rate can be expressed as:
+Therefore the query rate can be expressed as:
 $$
 Q \;=\; Q_{\text{join}} + Q_{\text{churn}}
 \;=\; \lambda_{\text{join}} \;+\; P\,\lambda_{\text{churn}}\,\theta_{\text{re}}.
@ -296,10 +297,11 @@ $$
 \text{Bandwidth}_{\text{maint}} = 6.67 + 48.2 \log_{2}(N)
 $$

-In the formula we have one variable which is `N` number of nodes. We can vary `N` from $2^4 - 2^5$ and see the expected traffic in the following figures (one per-sec and one per-day).
+In the formula we have one variable which is `N` number of nodes. We can vary `N` from $10^4 - 10^5$ and see the expected traffic in the following figures (one per-sec and one per-day).
 ![maint_per_sec](./eval/maint_per_sec.png)
 ![maint_per_day](./eval/maint_per_day.png)

+
 #### Advertise Bandwidth
 The simplified formula for traffic is:
 $$
@ -307,20 +309,22 @@ $$
 $$

 We have three variables here ($C, T, N$), but ideally we would like the x-axis to represent the number of contents (archive files) $C$ so that we see how increasing the number of archive CID affects the DHT. So, we can fix the other variables to known used values. We can consider:
- $N$ to be either $2^4$ or $2^5$
+- $N$ to be either $10^4$ or $10^5$
 - $T$ to be either 30min or 22hr

 In addition to the previous, we can represent x-axis as age of the community in weeks (`H`) and see how the traffic grows with the age of the community. See the following figure:
 ![adv_only](./eval/adv_only.png)
+
 We can clearly see the affect of the short re-advertise rate (30min) in the figure above!

 The previous figure considers a single community, let's now look at the bandwidth expected when a provider (member) is a part of multiple communities, see next figures:
 ![adv_multi_com_per_sec](./eval/adv_multi_com_per_sec.png)

+
 #### Query Bandwidth
 The last category of traffic is the query traffic with the formula:
 $$
-\text{Bandwidth}_{\text{query}} = C\,Q\,\big(14460 \,\log_{2}(N) + 528 + 4880P\big)
+\text{Bandwidth}_{\text{query}} = \frac{ C\,Q\,\big(14460 \,\log_{2}(N) + 528 + 4880P\big)}{N}
 $$
 where $Q$ is:
 $$
@ -329,9 +333,9 @@ $$

 This formula contains more variables than previous ones, and plotting for all would not be useful, so we can try to turn some of them into constants:
 - We can consider 3 community sizes: small (10 members), mid (250 members), and large (1000 members). So $P = \{10,250,1000\}$ 
- We can use $N =\{2^4,2^5\}$ , as we did before.
+- We can use $N =\{10^4,10^5\}$ , as we did before.
 - We can calculate $Q$ based on these assumptions:
-    - The join rate ($\lambda_{\text{join}}$) can be expressed as $\frac{0.05P}{7}$ meaning 5% of $P$ will join a week. This is just an assumption but it takes into account the size of the community so the larger the community (more popular), the more new joiners we can expect. We then divide that by $86400$ to get per-sec rate.
+    - The join rate ($\lambda_{\text{join}}$) can be expressed as $\frac{0.05P}{7}$ meaning 5% of $P$ will join a week. This is just an assumption but takes into account the size of the community so the larger the community (more popular), the more new joiners we can expect. We then divide that by $86400$ to get per-sec rate.
    - Churn rate ($\lambda_{\text{churn}}$) only makes sense in this setting if the provider (community member) is offline for more than a week since the archive is updated on a weekly basis. Let's assume 10% of $P$ will request one archive file a week (+index) which gives us $(\frac{0.1}{7}*P*\frac{2}{C})$
    - Based on the previous assumptions, we can then calculate $Q$ per-sec as:
    $Q = \frac{0.05P}{7*86400} + \frac{0.1}{7*86400}*P*\frac{2}{C}$
@ -345,6 +349,7 @@ Using the same assumptions on the variable ranges, we can see the total expected

 ![total_bandwidth](./eval/total_bandwidth.png)

+
 The above figure combines all expected bandwidth and in general it shows:
 - Bandwidth is not much affected by the number of nodes $N$
 - The re-advertize time interval plays a significant role in increasing the bandwidth.
@ -356,129 +361,130 @@ To understand which traffic category plays a major role here, let's try to measu
 ### N = 10,000, T = 30 min

 **small (P=10)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 3.592e3 |  18.02% |  81.81% |   0.17% |
-|  52 |  53 | 6.426e3 |  10.07% |  89.75% |   0.18% |
-| 104 | 105 | 12.095e3 |   5.35% |  94.47% |   0.18% |
-| 208 | 209 | 23.434e3 |   2.76% |  97.06% |   0.18% |
-| 416 | 417 | 46.111e3 |   1.40% |  98.41% |   0.18% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 3.501 |  18.05% |  81.95% |   0.00% |
+|  53 | 6.264 |  10.09% |  89.91% |   0.00% |
+| 105 | 11.791 |   5.36% |  94.64% |   0.00% |
+| 209 | 22.843 |   2.77% |  97.23% |   0.00% |
+| 417 | 44.948 |   1.41% |  98.59% |   0.00% |

 **mid (P=250)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 4.491e3 |  14.41% |  65.43% |  20.16% |
-|  52 |  53 | 8.079e3 |   8.01% |  71.39% |  20.60% |
-| 104 | 105 | 15.256e3 |   4.24% |  74.90% |  20.86% |
-| 208 | 209 | 29.610e3 |   2.19% |  76.81% |  21.00% |
-| 416 | 417 | 58.319e3 |   1.11% |  77.81% |  21.08% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 3.501 |  18.05% |  81.95% |   0.00% |
+|  53 | 6.265 |  10.09% |  89.91% |   0.00% |
+| 105 | 11.791 |   5.36% |  94.64% |   0.00% |
+| 209 | 22.844 |   2.77% |  97.23% |   0.00% |
+| 417 | 44.949 |   1.41% |  98.59% |   0.00% |

 **large (P=1000)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 16.586e3 |   3.90% |  17.72% |  78.38% |
-|  52 |  53 | 30.319e3 |   2.13% |  19.02% |  78.84% |
-| 104 | 105 | 57.785e3 |   1.12% |  19.77% |  79.11% |
-| 208 | 209 | 112.717e3 |   0.57% |  20.18% |  79.25% |
-| 416 | 417 | 222.581e3 |   0.29% |  20.39% |  79.32% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 3.502 |  18.05% |  81.95% |   0.00% |
+|  53 | 6.265 |  10.09% |  89.91% |   0.00% |
+| 105 | 11.791 |   5.36% |  94.64% |   0.01% |
+| 209 | 22.844 |   2.77% |  97.23% |   0.01% |
+| 417 | 44.951 |   1.41% |  98.59% |   0.01% |

 ---
 ### N = 10,000, T = 22 h

 **small (P=10)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 720.105 |  89.87% |   9.27% |   0.86% |
-|  52 |  53 | 789.601 |  81.96% |  16.60% |   1.44% |
-| 104 | 105 | 928.592 |  69.69% |  27.97% |   2.34% |
-| 208 | 209 | 1.207e3 |  53.63% |  42.84% |   3.52% |
-| 416 | 417 | 1.763e3 |  36.72% |  58.52% |   4.77% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 0.697 |  90.65% |   9.35% |   0.00% |
+|  53 | 0.760 |  83.16% |  16.84% |   0.00% |
+| 105 | 0.886 |  71.36% |  28.64% |   0.00% |
+| 209 | 1.137 |  55.59% |  44.41% |   0.00% |
+| 417 | 1.639 |  38.55% |  61.45% |   0.00% |

 **mid (P=250)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 1.619e3 |  39.97% |   4.12% |  55.90% |
-|  52 |  53 | 2.442e3 |  26.50% |   5.37% |  68.14% |
-| 104 | 105 | 4.089e3 |  15.83% |   6.35% |  77.82% |
-| 208 | 209 | 7.383e3 |   8.77% |   7.00% |  84.23% |
-| 416 | 417 | 13.970e3 |   4.63% |   7.38% |  87.99% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 0.697 |  90.64% |   9.35% |   0.01% |
+|  53 | 0.760 |  83.15% |  16.84% |   0.01% |
+| 105 | 0.886 |  71.35% |  28.63% |   0.02% |
+| 209 | 1.137 |  55.58% |  44.40% |   0.03% |
+| 417 | 1.640 |  38.54% |  61.42% |   0.03% |

 **large (P=1000)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 13.714e3 |   4.72% |   0.49% |  94.79% |
-|  52 |  53 | 24.682e3 |   2.62% |   0.53% |  96.85% |
-| 104 | 105 | 46.618e3 |   1.39% |   0.56% |  98.05% |
-| 208 | 209 | 90.489e3 |   0.72% |   0.57% |  98.71% |
-| 416 | 417 | 178.232e3 |   0.36% |   0.58% |  99.06% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 0.697 |  90.62% |   9.35% |   0.02% |
+|  53 | 0.760 |  83.12% |  16.84% |   0.04% |
+| 105 | 0.886 |  71.31% |  28.62% |   0.07% |
+| 209 | 1.138 |  55.54% |  44.36% |   0.10% |
+| 417 | 1.641 |  38.50% |  61.36% |   0.14% |
+

 ---
 ### N = 100,000, T = 30 min

 **small (P=10)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 4.473e3 |  18.05% |  81.79% |   0.17% |
-|  52 |  53 | 8.003e3 |  10.09% |  89.74% |   0.17% |
-| 104 | 105 | 15.062e3 |   5.36% |  94.47% |   0.17% |
-| 208 | 209 | 29.180e3 |   2.77% |  97.06% |   0.17% |
-| 416 | 417 | 57.416e3 |   1.41% |  98.42% |   0.18% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 4.361 |  18.08% |  81.92% |   0.00% |
+|  53 | 7.802 |  10.10% |  89.90% |   0.00% |
+| 105 | 14.683 |   5.37% |  94.63% |   0.00% |
+| 209 | 28.446 |   2.77% |  97.23% |   0.00% |
+| 417 | 55.972 |   1.41% |  98.59% |   0.00% |

 **mid (P=250)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 5.402e3 |  14.94% |  67.73% |  17.32% |
-|  52 |  53 | 9.710e3 |   8.31% |  73.96% |  17.72% |
-| 104 | 105 | 18.327e3 |   4.40% |  77.64% |  17.96% |
-| 208 | 209 | 35.559e3 |   2.27% |  79.65% |  18.08% |
-| 416 | 417 | 70.025e3 |   1.15% |  80.70% |  18.15% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 4.361 |  18.08% |  81.92% |   0.00% |
+|  53 | 7.802 |  10.10% |  89.90% |   0.00% |
+| 105 | 14.683 |   5.37% |  94.63% |   0.00% |
+| 209 | 28.446 |   2.77% |  97.23% |   0.00% |
+| 417 | 55.972 |   1.41% |  98.59% |   0.00% |

 **large (P=1000)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 17.590e3 |   4.59% |  20.80% |  74.61% |
-|  52 |  53 | 32.120e3 |   2.51% |  22.36% |  75.13% |
-| 104 | 105 | 61.180e3 |   1.32% |  23.26% |  75.42% |
-| 208 | 209 | 119.300e3 |   0.68% |  23.74% |  75.58% |
-| 416 | 417 | 235.541e3 |   0.34% |  23.99% |  75.67% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 4.361 |  18.08% |  81.92% |   0.00% |
+|  53 | 7.802 |  10.10% |  89.90% |   0.00% |
+| 105 | 14.684 |   5.37% |  94.63% |   0.00% |
+| 209 | 28.446 |   2.77% |  97.23% |   0.00% |
+| 417 | 55.972 |   1.41% |  98.59% |   0.00% |
+

 ---
 ### N = 100,000, T = 22 h

 **small (P=10)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 897.828 |  89.91% |   9.26% |   0.83% |
-|  52 |  53 | 984.126 |  82.03% |  16.59% |   1.39% |
-| 104 | 105 | 1.157e3 |  69.79% |  27.96% |   2.26% |
-| 208 | 209 | 1.502e3 |  53.75% |  42.86% |   3.39% |
-| 416 | 417 | 2.192e3 |  36.82% |  58.58% |   4.60% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 0.870 |  90.66% |   9.34% |   0.00% |
+|  53 | 0.948 |  83.18% |  16.82% |   0.00% |
+| 105 | 1.104 |  71.40% |  28.60% |   0.00% |
+| 209 | 1.417 |  55.64% |  44.36% |   0.00% |
+| 417 | 2.043 |  38.60% |  61.40% |   0.00% |

 **mid (P=250)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 1.826e3 |  44.20% |   4.55% |  51.25% |
-|  52 |  53 | 2.691e3 |  29.99% |   6.07% |  63.94% |
-| 104 | 105 | 4.421e3 |  18.26% |   7.31% |  74.43% |
-| 208 | 209 | 7.881e3 |  10.24% |   8.17% |  81.59% |
-| 416 | 417 | 14.801e3 |   5.45% |   8.68% |  85.87% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 0.870 |  90.66% |   9.34% |   0.00% |
+|  53 | 0.948 |  83.18% |  16.82% |   0.00% |
+| 105 | 1.104 |  71.40% |  28.60% |   0.00% |
+| 209 | 1.417 |  55.64% |  44.36% |   0.00% |
+| 417 | 2.043 |  38.60% |  61.40% |   0.00% |

 **large (P=1000)**
-| H (weeks) | C (=H+1) | Total (B/s) | Maint % | Adv % | Query % |
-|---:|---:|---:|---:|---:|---:|
-|  26 |  27 | 14.014e3 |   5.76% |   0.59% |  93.65% |
-|  52 |  53 | 25.101e3 |   3.22% |   0.65% |  96.13% |
-| 104 | 105 | 47.275e3 |   1.71% |   0.68% |  97.61% |
-| 208 | 209 | 91.622e3 |   0.88% |   0.70% |  98.42% |
-| 416 | 417 | 180.317e3 |   0.45% |   0.71% |  98.84% |
+| C (=H+1) | Total (KB/s) | Maint % | Adv % | Query % |
+|---:|---:|---:|---:|---:|
+|  27 | 0.870 |  90.66% |   9.34% |   0.00% |
+|  53 | 0.948 |  83.18% |  16.82% |   0.00% |
+| 105 | 1.104 |  71.39% |  28.60% |   0.01% |
+| 209 | 1.417 |  55.63% |  44.36% |   0.01% |
+| 417 | 2.043 |  38.59% |  61.40% |   0.01% |
+

 ## Conclusion & Future Work
 From the results, we can observe a few things:
- For small/mid size communities, the readvertise is the main factor so increasing the time interval $T$ to readvertise would save a lot of bandwidth. 
- For large size communities, the query rate actually dominates so increasing $T$ alone won't save you there, so in this setting it might make more sense to reduce the number of providers, i.e. not all community members must store the whole archive, a subset would do the job given the large size of the community, 25-50% of the community would be suffcient although one must be careful with the churn rate in large communities. 
- For large communities with large number of providers, we assume the response to `getProviders` would a long list of providers, but it would save a lot of bandwidth to simply cap the response to say $15$ providers.
+- For short TTL (T=30min), the readvertise is the main factor so increasing the time interval $T$ to readvertise would save a lot of bandwidth. For long TTL (T=22hr), the maintenance rate actually dominates. In this setting it might make more sense to reduce the number of providers, i.e. not all community members must store the whole archive, a subset would do the job given the large size of the community, 25-50% of the community would be suffcient although one must be careful with the churn rate in large communities and the fact that about 80% of the members use lightweight devices, i.e. they don't serve the archive. 

-It is difficult to conclude with a concrete number for the lower/upper bound of the size of the communities and number of communities simply because there are multiple variable that need to be taken into account. However, our results gives an estimate of the expected bandwidth a provider/member would have to handle in different settings: community size (small/mid/large), readvertise time interval ($T$). 
+It is difficult to conclude with a concrete number for the lower/upper bound of the size of the communities and number of communities simply because there are multiple variable that need to be taken into account. However, our results gives an estimate of the expected bandwidth a provider/member would have to handle in different settings: community size (small/mid/large), readvertise time interval ($T$). Overall, I don't see big warning signs so far and the numbers are seems ok. For instance being a member of 10 large size communities each is 1 year old (T = 30min), then you would expect about 64KB/s which is much less than streaming a movie, maybe with 100 communities it would be equal to that. 

 For future work we can look into:
 - Consider churn-rate and provide better estimate for the join rate based on real measured data, e.g. based on status communities or other similar apps.
@ -487,3 +493,4 @@ For future work we can look into:
    - Move to one append-only archive file
    - Merging & bundling at a longer time interval
    - Consider mutable data CIDs
+