From 3ef22a70bc47b7318a7d4fd4f75dbc17cfd9fb0d Mon Sep 17 00:00:00 2001
From: gmega <giuliano.mega@gmail.com>
Date: Thu, 7 Sep 2023 19:07:32 -0300
Subject: [PATCH] initial import

---
 analysis/swarm-overlay-sims/.Rbuildignore     |   4 +
 analysis/swarm-overlay-sims/.gitignore        |   9 +
 analysis/swarm-overlay-sims/DESCRIPTION       |  25 ++
 analysis/swarm-overlay-sims/R/dissemination.R |  12 +
 analysis/swarm-overlay-sims/R/swarmoverlay.R  |  31 ++
 analysis/swarm-overlay-sims/R/utils.R         |  33 ++
 analysis/swarm-overlay-sims/bibliography.bib  |  23 ++
 analysis/swarm-overlay-sims/ieee.csl          | 340 ++++++++++++++++++
 .../swarm-overlay-sims/renv/settings.json     |  19 +
 .../swarm-overlay-sim.Rproj                   |  17 +
 analysis/swarm-overlay-sims/swarms.Rmd        | 222 ++++++++++++
 11 files changed, 735 insertions(+)
 create mode 100644 analysis/swarm-overlay-sims/.Rbuildignore
 create mode 100644 analysis/swarm-overlay-sims/.gitignore
 create mode 100644 analysis/swarm-overlay-sims/DESCRIPTION
 create mode 100644 analysis/swarm-overlay-sims/R/dissemination.R
 create mode 100644 analysis/swarm-overlay-sims/R/swarmoverlay.R
 create mode 100644 analysis/swarm-overlay-sims/R/utils.R
 create mode 100644 analysis/swarm-overlay-sims/bibliography.bib
 create mode 100644 analysis/swarm-overlay-sims/ieee.csl
 create mode 100644 analysis/swarm-overlay-sims/renv/settings.json
 create mode 100644 analysis/swarm-overlay-sims/swarm-overlay-sim.Rproj
 create mode 100644 analysis/swarm-overlay-sims/swarms.Rmd

diff --git a/analysis/swarm-overlay-sims/.Rbuildignore b/analysis/swarm-overlay-sims/.Rbuildignore
new file mode 100644
index 0000000..d821302
--- /dev/null
+++ b/analysis/swarm-overlay-sims/.Rbuildignore
@@ -0,0 +1,4 @@
+^renv$
+^renv\.lock$
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/analysis/swarm-overlay-sims/.gitignore b/analysis/swarm-overlay-sims/.gitignore
new file mode 100644
index 0000000..b56e176
--- /dev/null
+++ b/analysis/swarm-overlay-sims/.gitignore
@@ -0,0 +1,9 @@
+.Rproj.user
+.RData
+.Rhistory
+*.html
+rsconnect
+libs
+data
+*cache
+*files
\ No newline at end of file
diff --git a/analysis/swarm-overlay-sims/DESCRIPTION b/analysis/swarm-overlay-sims/DESCRIPTION
new file mode 100644
index 0000000..48ec927
--- /dev/null
+++ b/analysis/swarm-overlay-sims/DESCRIPTION
@@ -0,0 +1,25 @@
+Package: swarm-overlay-sims
+Title: Swarm Overlay Simulations
+Version: 0.0.0.9000
+Description: Simple Simulations for Swarm Overlays
+Encoding: UTF-8
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.2.3
+Depends: 
+  tidyverse (>= 2.0.0),
+  purrr (>= 1.0.1),
+  R6 (>= 2.2.2),
+  igraph (>= 1.5.1),
+  ggraph,
+  glue,
+  itertools,
+  bookdown,
+  Hmisc,
+  playaxr,
+  plotly,
+  DT
+Remotes: gmega/playaxr
+Suggests:
+  devtools,
+  testthat (>= 3.0.0)
+Config/testthat/edition: 3
diff --git a/analysis/swarm-overlay-sims/R/dissemination.R b/analysis/swarm-overlay-sims/R/dissemination.R
new file mode 100644
index 0000000..dd55ef7
--- /dev/null
+++ b/analysis/swarm-overlay-sims/R/dissemination.R
@@ -0,0 +1,12 @@
+# ---- disseminate-broadcast ----
+disseminate_broadcast <- function(overlay, sources) {
+  dissemination_paths <- lapply(
+    sources,
+    function(source) bfs(
+      overlay, 
+      root = V(overlay)[name == source], 
+      dist = TRUE
+    )$dist
+  )
+  do.call(pmin, dissemination_paths)
+}
\ No newline at end of file
diff --git a/analysis/swarm-overlay-sims/R/swarmoverlay.R b/analysis/swarm-overlay-sims/R/swarmoverlay.R
new file mode 100644
index 0000000..7dbbb2b
--- /dev/null
+++ b/analysis/swarm-overlay-sims/R/swarmoverlay.R
@@ -0,0 +1,31 @@
+
+# ---- swarm-overlay ----
+swarm_overlay <- function(n, d, names = FALSE, directed = FALSE) {
+  swarm_overlay_edgelist(n, d) |> 
+    as_overlay_graph(names = names, directed = directed)
+}
+
+as_overlay_graph <- function(edge_list, names = FALSE, directed = FALSE) {
+  igraph::graph_from_data_frame(
+    edge_list,
+    directed = directed,
+    vertices = if (names) tibble(name = 1:max(edge_list$from)) else NULL
+  )
+}
+
+swarm_overlay_edgelist <- function(n, d) {
+  map(2:n, function(i) node_edges(i, d)) |> bind_rows()
+}
+
+node_edges <- function(i, d) {
+  # When i <= d, we have to connect everything we have.
+  if (i <= d) {
+    return(tibble(from = i, to = 1:(i - 1)))
+  }
+  
+  tibble(
+    from = i,
+    to = sample(1:(i - 1), d, replace = FALSE)
+  ) 
+}
+
diff --git a/analysis/swarm-overlay-sims/R/utils.R b/analysis/swarm-overlay-sims/R/utils.R
new file mode 100644
index 0000000..400ac2f
--- /dev/null
+++ b/analysis/swarm-overlay-sims/R/utils.R
@@ -0,0 +1,33 @@
+quantile_df <- function(x, probs = c(0.25, 0.5, 0.75)) {
+  tibble(
+    val = quantile(x, probs, na.rm = TRUE),
+    quant = probs
+  )
+}
+
+formatted_factor <- function(x, formatter) {
+  values <- unique(x)
+  levels <- formatter(values)[order(values)]
+  factor(formatter(x), levels)
+}
+
+dataset <- function(symbol, block, storage = "csv", reload = FALSE) {
+  varname <- deparse(substitute(symbol))
+  env <- rlang::caller_env()
+  if ((varname %in% names(env)) && !reload) {
+    message("Dataset already loaded.")
+    return()
+  }
+  fname <- glue('./data/{varname}.{storage}')
+  env[[varname]] <- if (file.exists(fname)) {
+    message(glue("Reading cached dataset from {fname}"))
+    read_csv(fname, show_col_types = FALSE)
+  } else {
+    message("Evaluating dataset expression.")
+    if (!dir.exists("./data")) dir.create("./data")
+    contents <- block
+    message(glue("Write dataset {fname}."))
+    write_csv(contents, file = fname)
+    contents
+  }
+}
\ No newline at end of file
diff --git a/analysis/swarm-overlay-sims/bibliography.bib b/analysis/swarm-overlay-sims/bibliography.bib
new file mode 100644
index 0000000..cf15690
--- /dev/null
+++ b/analysis/swarm-overlay-sims/bibliography.bib
@@ -0,0 +1,23 @@
+@article{hartmann-18,
+  title = {Distribution of diameters for Erd\ifmmode \mbox{\H{o}}\else \H{o}\fi{}s-R\'enyi random graphs},
+  author = {Hartmann, A. K. and M\'ezard, M.},
+  journal = {Phys. Rev. E},
+  volume = {97},
+  issue = {3},
+  year = {2018},
+  month = {Mar},
+  publisher = {American Physical Society},
+  url = {https://link.aps.org/doi/10.1103/PhysRevE.97.032128}
+}
+
+@article{hartmann-02,
+  title = {Sampling rare events: Statistics of local sequence alignments},
+  author = {Hartmann, Alexander K.},
+  journal = {Phys. Rev. E},
+  volume = {65},
+  issue = {5},
+  year = {2002},
+  month = {Apr},
+  publisher = {American Physical Society},
+  url = {https://link.aps.org/doi/10.1103/PhysRevE.65.056102}
+}
diff --git a/analysis/swarm-overlay-sims/ieee.csl b/analysis/swarm-overlay-sims/ieee.csl
new file mode 100644
index 0000000..cb5ab66
--- /dev/null
+++ b/analysis/swarm-overlay-sims/ieee.csl
@@ -0,0 +1,340 @@
+<?xml version="1.0" encoding="utf-8"?>
+<style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" demote-non-dropping-particle="sort-only">
+  <info>
+    <title>IEEE</title>
+    <id>http://www.zotero.org/styles/ieee</id>
+    <link href="http://www.zotero.org/styles/ieee" rel="self"/>
+    <link href="http://www.ieee.org/documents/style_manual.pdf" rel="documentation"/>
+    <link href="http://www.ieee.org/documents/auinfo07.pdf" rel="documentation"/>
+    <author>
+      <name>Michael Berkowitz</name>
+      <email>mberkowi@gmu.edu</email>
+    </author>
+    <contributor>
+      <name>Julian Onions</name>
+      <email>julian.onions@gmail.com</email>
+    </contributor>
+    <contributor>
+      <name>Rintze Zelle</name>
+      <uri>http://twitter.com/rintzezelle</uri>
+    </contributor>
+    <contributor>
+      <name>Stephen Frank</name>
+      <uri>http://www.zotero.org/sfrank</uri>
+    </contributor>
+    <contributor>
+      <name>Sebastian Karcher</name>
+    </contributor>
+    <category citation-format="numeric"/>
+    <category field="engineering"/>
+    <category field="generic-base"/>
+    <updated>2016-10-06T15:32:30+00:00</updated>
+    <rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>
+  </info>
+  <locale xml:lang="en">
+    <terms>
+      <term name="chapter" form="short">ch.</term>
+      <term name="presented at">presented at the</term>
+      <term name="available at">available</term>
+    </terms>
+  </locale>
+  <!-- Macros -->
+  <macro name="edition">
+    <choose>
+      <if type="bill book chapter graphic legal_case legislation motion_picture paper-conference report song" match="any">
+        <choose>
+          <if is-numeric="edition">
+            <group delimiter=" ">
+              <number variable="edition" form="ordinal"/>
+              <text term="edition" form="short"/>
+            </group>
+          </if>
+          <else>
+            <text variable="edition" text-case="capitalize-first" suffix="."/>
+          </else>
+        </choose>
+      </if>
+    </choose>
+  </macro>
+  <macro name="issued">
+    <choose>
+      <if type="article-journal report" match="any">
+        <date variable="issued">
+          <date-part name="month" form="short" suffix=" "/>
+          <date-part name="year" form="long"/>
+        </date>
+      </if>
+      <else-if type="bill book chapter graphic legal_case legislation motion_picture paper-conference song thesis" match="any">
+        <date variable="issued">
+          <date-part name="year" form="long"/>
+        </date>
+      </else-if>
+      <else>
+        <date variable="issued">
+          <date-part name="day" form="numeric-leading-zeros" suffix="-"/>
+          <date-part name="month" form="short" suffix="-" strip-periods="true"/>
+          <date-part name="year" form="long"/>
+        </date>
+      </else>
+    </choose>
+  </macro>
+  <macro name="author">
+    <names variable="author">
+      <name and="text" et-al-min="7" et-al-use-first="1" initialize-with=". "/>
+      <label form="short" prefix=", " text-case="capitalize-first"/>
+      <et-al font-style="italic"/>
+      <substitute>
+        <names variable="editor"/>
+        <names variable="translator"/>
+      </substitute>
+    </names>
+  </macro>
+  <macro name="editor">
+    <names variable="editor">
+      <name initialize-with=". " delimiter=", " and="text"/>
+      <label form="short" prefix=", " text-case="capitalize-first"/>
+    </names>
+  </macro>
+  <macro name="locators">
+    <group delimiter=", ">
+      <text macro="edition"/>
+      <group delimiter=" ">
+        <text term="volume" form="short"/>
+        <number variable="volume" form="numeric"/>
+      </group>
+      <group delimiter=" ">
+        <number variable="number-of-volumes" form="numeric"/>
+        <text term="volume" form="short" plural="true"/>
+      </group>
+      <group delimiter=" ">
+        <text term="issue" form="short"/>
+        <number variable="issue" form="numeric"/>
+      </group>
+    </group>
+  </macro>
+  <macro name="title">
+    <choose>
+      <if type="bill book graphic legal_case legislation motion_picture song" match="any">
+        <text variable="title" font-style="italic"/>
+      </if>
+      <else>
+        <text variable="title" quotes="true"/>
+      </else>
+    </choose>
+  </macro>
+  <macro name="publisher">
+    <choose>
+      <if type="bill book chapter graphic legal_case legislation motion_picture paper-conference song" match="any">
+        <group delimiter=": ">
+          <text variable="publisher-place"/>
+          <text variable="publisher"/>
+        </group>
+      </if>
+      <else>
+        <group delimiter=", ">
+          <text variable="publisher"/>
+          <text variable="publisher-place"/>
+        </group>
+      </else>
+    </choose>
+  </macro>
+  <macro name="event">
+    <choose>
+      <if type="paper-conference speech" match="any">
+        <choose>
+          <!-- Published Conference Paper -->
+          <if variable="container-title">
+            <group delimiter=", ">
+              <group delimiter=" ">
+                <text term="in"/>
+                <text variable="container-title" font-style="italic"/>
+              </group>
+              <text variable="event-place"/>
+            </group>
+          </if>
+          <!-- Unpublished Conference Paper -->
+          <else>
+            <group delimiter=", ">
+              <group delimiter=" ">
+                <text term="presented at"/>
+                <text variable="event"/>
+              </group>
+              <text variable="event-place"/>
+            </group>
+          </else>
+        </choose>
+      </if>
+    </choose>
+  </macro>
+  <macro name="access">
+    <choose>
+      <if type="webpage">
+        <choose>
+          <if variable="URL">
+            <group delimiter=". ">
+              <text term="online" prefix="[" suffix="]" text-case="capitalize-first"/>
+              <group delimiter=": ">
+                <text term="available at" text-case="capitalize-first"/>
+                <text variable="URL"/>
+              </group>
+              <group prefix="[" suffix="]" delimiter=": ">
+                <text term="accessed" text-case="capitalize-first"/>
+                <date variable="accessed">
+                  <date-part name="day" form="numeric-leading-zeros" suffix="-"/>
+                  <date-part name="month" form="short" suffix="-" strip-periods="true"/>
+                  <date-part name="year" form="long"/>
+                </date>
+              </group>
+            </group>
+          </if>
+        </choose>
+      </if>
+    </choose>
+  </macro>
+  <macro name="page">
+    <group>
+      <label variable="page" form="short" suffix=" "/>
+      <text variable="page"/>
+    </group>
+  </macro>
+  <macro name="citation-locator">
+    <group delimiter=" ">
+      <choose>
+        <if locator="page">
+          <label variable="locator" form="short"/>
+        </if>
+        <else>
+          <label variable="locator" form="short" text-case="capitalize-first"/>
+        </else>
+      </choose>
+      <text variable="locator"/>
+    </group>
+  </macro>
+  <!-- Citation -->
+  <citation collapse="citation-number">
+    <sort>
+      <key variable="citation-number"/>
+    </sort>
+    <layout delimiter=", ">
+      <group prefix="[" suffix="]" delimiter=", ">
+        <text variable="citation-number"/>
+        <text macro="citation-locator"/>
+      </group>
+    </layout>
+  </citation>
+  <!-- Bibliography -->
+  <bibliography entry-spacing="0" second-field-align="flush">
+    <layout suffix=".">
+      <!-- Citation Number -->
+      <text variable="citation-number" prefix="[" suffix="]"/>
+      <!-- Author(s) -->
+      <text macro="author" suffix=", "/>
+      <!-- Rest of Citation -->
+      <choose>
+        <!-- Specific Formats -->
+        <if type="article-journal">
+          <group delimiter=", ">
+            <text macro="title"/>
+            <text variable="container-title" font-style="italic" form="short"/>
+            <text macro="locators"/>
+            <text macro="page"/>
+            <text macro="issued"/>
+          </group>
+        </if>
+        <else-if type="paper-conference speech" match="any">
+          <group delimiter=", ">
+            <text macro="title"/>
+            <text macro="event"/>
+            <text macro="issued"/>
+            <text macro="locators"/>
+            <text macro="page"/>
+          </group>
+        </else-if>
+        <else-if type="report">
+          <group delimiter=", ">
+            <text macro="title"/>
+            <text macro="publisher"/>
+            <group delimiter=" ">
+              <text variable="genre"/>
+              <text variable="number"/>
+            </group>
+            <text macro="issued"/>
+          </group>
+        </else-if>
+        <else-if type="thesis">
+          <group delimiter=", ">
+            <text macro="title"/>
+            <text variable="genre"/>
+            <text macro="publisher"/>
+            <text macro="issued"/>
+          </group>
+        </else-if>
+        <else-if type="webpage post-weblog" match="any">
+          <group delimiter=", " suffix=". ">
+            <text macro="title"/>
+            <text variable="container-title" font-style="italic"/>
+            <text macro="issued"/>
+          </group>
+          <text macro="access"/>
+        </else-if>
+        <else-if type="patent">
+          <group delimiter=", ">
+            <text macro="title"/>
+            <text variable="number"/>
+            <text macro="issued"/>
+          </group>
+        </else-if>
+        <!-- Generic/Fallback Formats -->
+        <else-if type="bill book graphic legal_case legislation motion_picture report song" match="any">
+          <group delimiter=", " suffix=". ">
+            <text macro="title"/>
+            <text macro="locators"/>
+          </group>
+          <group delimiter=", ">
+            <text macro="publisher"/>
+            <text macro="issued"/>
+            <text macro="page"/>
+          </group>
+        </else-if>
+        <else-if type="article-magazine article-newspaper broadcast interview manuscript map patent personal_communication song speech thesis webpage" match="any">
+          <group delimiter=", ">
+            <text macro="title"/>
+            <text variable="container-title" font-style="italic"/>
+            <text macro="locators"/>
+            <text macro="publisher"/>
+            <text macro="page"/>
+            <text macro="issued"/>
+          </group>
+        </else-if>
+        <else-if type="chapter paper-conference" match="any">
+          <group delimiter=", " suffix=", ">
+            <text macro="title"/>
+            <group delimiter=" ">
+              <text term="in"/>
+              <text variable="container-title" font-style="italic"/>
+            </group>
+            <text macro="locators"/>
+          </group>
+          <text macro="editor" suffix=" "/>
+          <group delimiter=", ">
+            <text macro="publisher"/>
+            <text macro="issued"/>
+            <text macro="page"/>
+          </group>
+        </else-if>
+        <else>
+          <group delimiter=", " suffix=". ">
+            <text macro="title"/>
+            <text variable="container-title" font-style="italic"/>
+            <text macro="locators"/>
+          </group>
+          <group delimiter=", ">
+            <text macro="publisher"/>
+            <text macro="page"/>
+            <text macro="issued"/>
+          </group>
+        </else>
+      </choose>
+    </layout>
+  </bibliography>
+</style>
diff --git a/analysis/swarm-overlay-sims/renv/settings.json b/analysis/swarm-overlay-sims/renv/settings.json
new file mode 100644
index 0000000..74c1d4b
--- /dev/null
+++ b/analysis/swarm-overlay-sims/renv/settings.json
@@ -0,0 +1,19 @@
+{
+  "bioconductor.version": null,
+  "external.libraries": [],
+  "ignored.packages": [],
+  "package.dependency.fields": [
+    "Imports",
+    "Depends",
+    "LinkingTo"
+  ],
+  "ppm.enabled": null,
+  "ppm.ignored.urls": [],
+  "r.version": null,
+  "snapshot.type": "explicit",
+  "use.cache": true,
+  "vcs.ignore.cellar": true,
+  "vcs.ignore.library": true,
+  "vcs.ignore.local": true,
+  "vcs.manage.ignores": true
+}
diff --git a/analysis/swarm-overlay-sims/swarm-overlay-sim.Rproj b/analysis/swarm-overlay-sims/swarm-overlay-sim.Rproj
new file mode 100644
index 0000000..21a4da0
--- /dev/null
+++ b/analysis/swarm-overlay-sims/swarm-overlay-sim.Rproj
@@ -0,0 +1,17 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
diff --git a/analysis/swarm-overlay-sims/swarms.Rmd b/analysis/swarm-overlay-sims/swarms.Rmd
new file mode 100644
index 0000000..d539824
--- /dev/null
+++ b/analysis/swarm-overlay-sims/swarms.Rmd
@@ -0,0 +1,222 @@
+---
+title: "Codex Swarm Overlays"
+output: 
+  bookdown::gitbook:
+    pandoc_args: [ "--csl", "ieee.csl" ]
+    split_by: none
+
+bibliography: [bibliography.bib]
+link-citations: true
+---
+
+
+```{r cache=FALSE, echo=FALSE, warning=FALSE, message=FALSE}
+knitr::read_chunk('R/swarmoverlay.R')
+knitr::read_chunk('R/dissemination.R')
+devtools::load_all()
+```
+
+# Context
+
+As we evolved an understanding on what needs to be understood about our swarm protocol, we realized there might be questions that are more important and more within reach than expected.
+
+# Graph Structure
+
+Our protocol works by having a node join the network and ask a _bootstrap node_ for a random subset of size $d$ of the nodes that are currently in the swarm. Absent any dynamics, this should intuitively converge into a variant of a $G(n, p = 1/d)$, [Erdös-Rényi](https://en.wikipedia.org/wiki/Erd%C5%91s%E2%80%93R%C3%A9nyi_model) model, whose connectivity and diameters are well-studied.
+
+For instance, we know that if $p > (1+ \varepsilon )\ln n/n$, then $G$ is connected almost surely. Almost surely, however, means that the graph might be disconnected, particularly for small $n$. 
+
+The diameter of such graphs, on the other hand, is relatively poorly understood, with numeric results being published only relatively recently, for certain values of $p$ [@hartmann-18], and with no publicly available data or code for us to study the problem further.
+
+Furthermore, it is clear that our graphs will have a different degree distribution from Erdös-Renyi graphs as the generating game for the graph is different -- nodes that enter the network early will tend to have higher degrees than nodes that enter the network late. This can lead to load imbalances, which we also intend to look into.
+
+# Initial Questions
+
+Given a network $N = \left\{o_1, \cdots, o_n\right\}$ with $n$ nodes:
+
+1. How often will there be disconnected clusters?
+2. What is the degree distribution for these graphs?
+3. How fast should we expect a block to _percolate_ over the network as we vary the proportions of storage nodes vs. downloader nodes?
+
+We can provide preliminary answers to these questions by means of simple simulations. 
+
+## Generating Overlay Samples
+
+The algorithm for generating overlay graphs -- which will be used throughout our experiments -- is shown below. The key is the `node_edges` procedure, where we simulate a node being bootstrapped from a replicated tracker with a subset of the nodes already in the swarm as its neighbors:
+
+```{r swarm-overlay}
+```
+
+Fig \@ref(fig:example-overlay) shows a sample overlay generated using the algorithm. Note that, as expected, nodes that enter the network earlier (have a lower id number) tend to exhibit higher degrees.
+
+```{r example-overlay, fig.cap="A sample of $G(15, 2)$.", warning=FALSE, fig.align="center"}
+swarm_overlay(15, 2, directed = TRUE) |> 
+  ggraph(layout = "stress") +
+  geom_edge_link(
+    arrow = arrow(length = unit(0.1, "inches")), 
+    end_cap = circle(3, 'mm')
+  ) + 
+  geom_node_point(size = 8) +
+  geom_node_text(aes(label = name), col = "white") +
+  theme_graph() +
+  set_graph_style(face = "bold")
+```
+
+## How Often Will There Be Disconnected Clusters?
+
+Theory from Erdös-Renyi graphs puts the connectivity threshold at $\frac{(1 + \epsilon) \ln n}{n}$, but our graphs are different enough that we can make more precise statements.
+
+**Theorem 1.** If edges are undirected, then $G(n, d)$ is _always_ connected. If edges are directed, on the other hand, then $G(n, d)$ is _never_ strongly connected.
+
+For part 1, the reasoning is inductive: assuming that $G(n - 1, d)$ forms a connected component, then $G(n, d)$ must also form a connected component as the $n^{th}$ node will have undirected edges into $G(n - 1, d)$.
+
+Part 2 on the other hand follows trivially from the fact that node $1$, the first node in the network, has no outbound edges, and is therefore will always be out of the strongly connected component in the graph. $\blacksquare$
+
+Because of the way we propose the protocol to work, we can assume our graphs to be undirected for the time being.
+
+## What is the degree distribution for these graphs?
+
+To try to get a grip on degree distributions, will look into graphs $G_{i,j} = (V_i, E_j)$ where:
+
+$$
+|V_i| = 10 \times 2^i
+$$
+this means we start with a graph of size $10$ and then double its size for $i = \{0, \cdots, 7\}$. 
+
+```{r}
+v_i <- function(i) 10 * (2**i)
+```
+
+We will then look at values for $d$ which range from $1$ to the critical threshold for percolation in Erdös-Renyi graphs, meaning:
+
+$$
+1 \leq d \leq \left\lceil\ln |V_i|\right\rceil
+$$
+
+with $d \in \mathbb{N}$.
+
+```{r}
+d_i <- function(i) 1:ceil(log(v_i(i)))
+```
+
+Although more sophisticated sampling approaches are definitely possible [@hartmann-02], we will simply generate $100$ graphs for each configuration, and compute their empirical CDF. We can then use that to get percentiles. To make things a bit more efficient, we will pre-generate the edge lists. The simulation code is pasted next, and Figure \@ref(fig:edge-degrees) shows the percentiles of the degree distributions as a function of swarm size, faceted by $d \in \{1, 2, 3, 4\}$.
+
+```{r}
+n_samples <- 500
+```
+
+```{r}
+parameters <- chain(
+  map(0:8, function(i) product(v = v_i(i), d = d_i(i)) |> as.list())) |> 
+    as.list() |>
+    list_c()
+```
+
+```{r cache = TRUE, cache.lazy = FALSE}
+dataset(
+  edge_lists,
+  storage = 'csv.bz2',
+  map(parameters, function(parameter) {
+    parallel::mclapply(
+      1:n_samples,
+      mc.cores = 8, # Works On My Machine (tm)
+      mc.set.seed = TRUE, # make sure to re-seed the forked processes
+      function(i) {
+        swarm_overlay_edgelist(parameter$v, parameter$d) |> mutate(
+          v = parameter$v,
+          d = parameter$d,
+          instance = i
+        )
+      }
+    ) |> bind_rows()
+  }) |> bind_rows()
+)
+```
+
+```{r cache = TRUE, cache.lazy = FALSE}
+dataset(
+  edge_degrees,
+  edge_lists |> 
+    group_by(v, d, instance, to) |> 
+    count(name = 'degree') |>  
+    group_by(v, d) |>
+    reframe(quantile_df(degree, c(0, 0.1, 0.25, 0.50, 0.75, 0.9, 0.95, 1))) |>
+    rename(degree = val)
+)
+```
+
+```{r edge-degrees, fig.cap="Edge degrees distribution percentiles as a function of swarm size, faceted by $d \\in \\{1, 2, 3, 4\\}$."}
+plotly::ggplotly(ggplot(edge_degrees |> filter(d < 5)) +
+  geom_line(aes(x = v, y = degree, col = formatted_factor(quant, function(x) glue('{x*100}')))) +
+  scale_x_log10() +
+  xlab('swarm size') +
+  theme_playax() +
+  labs(colour = "percentile") +
+  facet_grid(cols=vars(d)))
+```
+
+We can make three main conclusions from this:
+
+1. that the median ($50^{th}$ percentile) degree in the graph converges to $d$ and stays constant, regardless of the size of the swarm;
+2. that that the variance increases with $d$, but marginally or not at all with the size of the swarm;
+3. that the maximum degree increases rapidly with $d$.
+
+For completeness, the data is shown in Table \@ref(fig:table-degrees).
+
+```{r table-degrees, fig.cap="Vertex degrees.", echo=FALSE}
+datatable(
+  edge_degrees |> 
+    arrange(v, d, desc(degree)) |>
+    mutate(quant = quant * 100) |>
+    rename(`swarm size` = v, `vertex degree` = degree, percentile = quant),
+  options = list(
+    dom = 'Brtip',
+    columnDefs = list(
+      list(targets = 0, visible = FALSE)
+    )
+  ),
+  filter = list(position = 'top'),
+) |>
+  formatCurrency(c('percentile'), currency = " th", before = FALSE, digits = 0)
+```
+
+
+This is all, to a certain degree, obvious, as the probability that a node gets selected as a neighbor is biased by its swarm lifetime, and points to the need of creating some type of counterweight to reverse that bias. The obvious choice would be to make older nodes less likely to be chosen on bootstrap, but this could make the swarm easy to hijack (think adversary flooding the swarm with new nodes and taking it over).
+
+The less obvious choice would be to have nodes reject neighbor requests once a threshold is met, effectively truncating the tail of the degree distribution. This could make the bootstrap procedure more complex/slower as a node would have to request more nodes from the bootstrap service again. We will keep those in mind for the next iteration.
+
+##  How fast should we expect a block to _percolate_ over the network?
+
+In the absence of a link capacity and/or network delay model, graph topology should dominate dissemination time. The simplest case to analyse is to assume that nodes are able to broadcast the packet to _all of its neighbors_. The main appeal is that this is easy to implement, and can already provide some insight.
+
+```{r disseminate-broadcast}
+```
+
+We will take the overlays we had from before and run a simple experiment where we pick $1, 2, 3$ and $4$ starting nodes chosen at random in the overlays, and compute the average dissemination times for those.
+
+```{r}
+n_sources_max <- 4
+```
+
+```{r eval=FALSE}
+map(parameters[1], function(parameter) {
+  map(1:n_sources_max, function(n_sources) {
+    latencies <- map(1:n_samples, function(instance) {
+      sources <- sample(1:parameter$v, size = n_sources, replace = FALSE)
+      latencies <- edge_lists |> 
+        filter(
+          d == parameter$d,
+          v == parameter$v,
+          instance == !!instance
+        ) |> 
+        as_overlay_graph() |>
+        disseminate_broadcast(sources)
+    }) |> 
+      list_c() |>
+      quantile_df(c(0, 0.1, 0.25, 0.50, 0.75, 0.9, 0.95, 1)) |>
+      mutate(d = parameter$d, v = parameter$v, sources = n_sources)
+  }) |> bind_rows()
+}) |> bind_rows()
+```
+
+# References