mirror of
https://github.com/logos-storage/bittorrent-benchmarks.git
synced 2026-01-03 21:43:09 +00:00
feat: add multi-experiment analysis
This commit is contained in:
parent
84bac4594d
commit
0acd2e3086
10
README.md
10
README.md
@ -9,9 +9,9 @@ locally, however, using [Minikube](https://minikube.sigs.k8s.io/) (or Kind, or D
|
||||
|
||||
## Limits
|
||||
|
||||
When running experiments locally in a Linux machine, you will bump onto a number of
|
||||
limitations. I have documented those here. I won't go into how to make those changes
|
||||
permanent within your system as there's significant variation across distributions.
|
||||
When running experiments locally in a Linux machine, you will likely need to adjust several
|
||||
of the default OS limits. I won't go into how to make those changes permanent within your
|
||||
system as there's significant variation across distributions.
|
||||
|
||||
**ARP Cache.** The default size for the ARP cache is too small. You should bump it
|
||||
significantly, e.g.:
|
||||
@ -22,7 +22,7 @@ echo 8192 | sudo tee /proc/sys/net/ipv4/neigh/default/gc_thresh2
|
||||
echo 16384 | sudo tee /proc/sys/net/ipv4/neigh/default/gc_thresh3
|
||||
```
|
||||
|
||||
**Inotify.** Kubernetes seems to enjoy watching the filesystem, so
|
||||
**inotify.** Kubernetes seems to enjoy watching the filesystem, so
|
||||
you should increase inotify limits across the board:
|
||||
|
||||
```bash
|
||||
@ -31,7 +31,7 @@ sudo sysctl -w fs.inotify.max_queued_events=2099999999
|
||||
sudo sysctl -w fs.inotify.max_user_watches=2099999999
|
||||
```
|
||||
|
||||
**Kernel key retention service.* Kubernetes also places a large number of keys
|
||||
**Kernel key retention service.** Kubernetes also places a large number of keys
|
||||
within the kernel. Make sure you have enough room:
|
||||
|
||||
```bash
|
||||
|
||||
4
analysis/final.analysis/.Rbuildignore
Normal file
4
analysis/final.analysis/.Rbuildignore
Normal file
@ -0,0 +1,4 @@
|
||||
^renv$
|
||||
^renv\.lock$
|
||||
^.*\.Rproj$
|
||||
^\.Rproj\.user$
|
||||
1
analysis/final.analysis/.Rprofile
Normal file
1
analysis/final.analysis/.Rprofile
Normal file
@ -0,0 +1 @@
|
||||
source("renv/activate.R")
|
||||
1
analysis/final.analysis/.gitignore
vendored
Normal file
1
analysis/final.analysis/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
data
|
||||
16
analysis/final.analysis/DESCRIPTION
Normal file
16
analysis/final.analysis/DESCRIPTION
Normal file
@ -0,0 +1,16 @@
|
||||
Package: final.analysis
|
||||
Type: Package
|
||||
Title: What the Package Does (Title Case)
|
||||
Version: 0.1.0
|
||||
Authors@R: c(
|
||||
person(
|
||||
"Jane", "Doe",
|
||||
email = "jane@example.com",
|
||||
role = c("aut", "cre")
|
||||
)
|
||||
)
|
||||
Description: More about what it does (maybe more than one line).
|
||||
Continuation lines should be indented.
|
||||
License: What license is it under?
|
||||
Encoding: UTF-8
|
||||
LazyData: true
|
||||
1
analysis/final.analysis/NAMESPACE
Normal file
1
analysis/final.analysis/NAMESPACE
Normal file
@ -0,0 +1 @@
|
||||
exportPattern("^[[:alpha:]]+")
|
||||
106
analysis/final.analysis/R/analysis.R
Normal file
106
analysis/final.analysis/R/analysis.R
Normal file
@ -0,0 +1,106 @@
|
||||
extract_repetitions <- function(deluge_torrent_download) {
|
||||
deluge_torrent_download |>
|
||||
mutate(
|
||||
temp = str_remove(torrent_name, '^dataset-'),
|
||||
seed_set = as.numeric(str_extract(temp, '^\\d+')),
|
||||
run = as.numeric(str_extract(temp, '\\d+$'))
|
||||
) |>
|
||||
rename(piece = value) |>
|
||||
select(-temp, -name)
|
||||
}
|
||||
|
||||
compute_pieces <- function(deluge_torrent_download, n_pieces) {
|
||||
deluge_torrent_download |>
|
||||
group_by(node, seed_set, run) |>
|
||||
arrange(timestamp) |>
|
||||
mutate(
|
||||
piece_count = seq_along(timestamp)
|
||||
) |>
|
||||
ungroup() |>
|
||||
mutate(completed = piece_count / n_pieces)
|
||||
}
|
||||
|
||||
check_incomplete_downloads <- function(deluge_torrent_download, n_pieces) {
|
||||
incomplete_downloads <- downloads |>
|
||||
group_by(node, seed_set, run) |>
|
||||
count() |>
|
||||
ungroup() |>
|
||||
filter(n != n_pieces)
|
||||
|
||||
nrow(incomplete_downloads) == 0
|
||||
}
|
||||
|
||||
check_mismatching_repetitions <- function(deluge_torrent_download, repetitions) {
|
||||
mismatching_repetitions <- downloads |>
|
||||
select(seed_set, node, run) |>
|
||||
distinct() |>
|
||||
group_by(seed_set, node) |>
|
||||
count() |>
|
||||
filter(n != repetitions)
|
||||
|
||||
nrow(mismatching_repetitions) == 0
|
||||
}
|
||||
|
||||
compute_download_times <- function(meta, request_event, deluge_torrent_download, group_id) {
|
||||
n_leechers <- meta$nodes$network_size - meta$seeders
|
||||
|
||||
download_start <- request_event |>
|
||||
select(-request_id) |>
|
||||
filter(name == 'leech', type == 'RequestEventType.end') |>
|
||||
mutate(
|
||||
# We didn't log those on the runner side so I have to reconstruct them.
|
||||
run = rep(rep(
|
||||
1:meta$repetitions - 1,
|
||||
each = n_leechers), times=meta$seeder_sets),
|
||||
seed_set = rep(
|
||||
1:meta$seeder_sets - 1,
|
||||
each = n_leechers * meta$repetitions),
|
||||
) |>
|
||||
transmute(node = destination, run, seed_set, seed_request_time = timestamp)
|
||||
|
||||
download_times <- deluge_torrent_download |>
|
||||
# FIXME remove this once we fix the chart
|
||||
mutate(node = sub(pattern = glue::glue('-{group_id}$'), replacement = '', x = node)) |>
|
||||
left_join(download_start, by = c('node', 'run', 'seed_set')) |>
|
||||
mutate(
|
||||
elapsed_download_time = as.numeric(timestamp - seed_request_time)
|
||||
) |>
|
||||
group_by(node, run, seed_set) |>
|
||||
mutate(lookup_time = as.numeric(min(timestamp) - seed_request_time)) |>
|
||||
ungroup()
|
||||
|
||||
if (nrow(download_times |>
|
||||
filter(elapsed_download_time < 0 | lookup_time < 0)) > 0) {
|
||||
stop('Calculation for download times contains negative numbers')
|
||||
}
|
||||
|
||||
download_times
|
||||
}
|
||||
|
||||
check_seeder_count <- function(download_times, seeders) {
|
||||
mismatching_seeders <- download_times |>
|
||||
filter(is.na(seed_request_time)) |>
|
||||
select(node, seed_set, run) |>
|
||||
distinct() |>
|
||||
group_by(seed_set, run) |>
|
||||
count() |>
|
||||
filter(n != seeders)
|
||||
|
||||
nrow(mismatching_seeders) == 0
|
||||
}
|
||||
|
||||
download_time_stats <- function(download_times) {
|
||||
download_times |>
|
||||
filter(!is.na(elapsed_download_time)) |>
|
||||
group_by(piece_count, completed) |>
|
||||
summarise(
|
||||
mean = mean(elapsed_download_time),
|
||||
median = median(elapsed_download_time),
|
||||
max = max(elapsed_download_time),
|
||||
min = min(elapsed_download_time),
|
||||
p90 = quantile(elapsed_download_time, p = 0.95),
|
||||
p10 = quantile(elapsed_download_time, p = 0.05),
|
||||
.groups = 'drop'
|
||||
)
|
||||
}
|
||||
|
||||
25
analysis/final.analysis/R/read-all-experiments.R
Normal file
25
analysis/final.analysis/R/read-all-experiments.R
Normal file
@ -0,0 +1,25 @@
|
||||
read_all_experiments <- function(base_path, skip_incomplete = TRUE) {
|
||||
roots <- list.files(base_path,
|
||||
include.dirs = TRUE, no.. = TRUE, full.names = TRUE)
|
||||
|
||||
experiments <- lapply(roots, read_single_experiment)
|
||||
names(experiments) <- sapply(roots, basename)
|
||||
|
||||
# Validates that no experiment has missing data.
|
||||
key_sets <- lapply(experiments, ls) |> unique()
|
||||
# Selects the largest keyset which is presumably the most complete.
|
||||
key_set <- key_sets[[order(sapply(key_sets, length), decreasing = TRUE)[1]]]
|
||||
|
||||
# Discards any experiment that doesn't have all keys.
|
||||
experiments <- lapply(experiments, function(experiment) {
|
||||
if (!(all(key_set %in% names(experiment)))) {
|
||||
warning(glue::glue('Experiment {experiment$experiment_id} is missing ',
|
||||
'some keys and will be discarded.'))
|
||||
NULL
|
||||
} else {
|
||||
experiment
|
||||
}
|
||||
})
|
||||
|
||||
experiments[!is.null(experiments)]
|
||||
}
|
||||
36
analysis/final.analysis/R/read-single-experiment.R
Normal file
36
analysis/final.analysis/R/read-single-experiment.R
Normal file
@ -0,0 +1,36 @@
|
||||
read_single_experiment <- function(experiment_folder) {
|
||||
# This is a structural assumption: the base folder for the experiment
|
||||
# corresponds to its ID.
|
||||
experiment_id <- basename(experiment_folder)
|
||||
print(glue::glue('Reading experiment {experiment_id}'))
|
||||
|
||||
meta <- jsonlite::read_json(.lookup_experiment_config(experiment_folder))
|
||||
table_files <- list.files(path = experiment_folder, '\\.csv$')
|
||||
data <- lapply(table_files, function(table_file) {
|
||||
read_csv(
|
||||
file.path(experiment_folder, table_file),
|
||||
show_col_types = FALSE
|
||||
) |>
|
||||
mutate(
|
||||
experiment_id = !!experiment_id
|
||||
)
|
||||
})
|
||||
|
||||
names(data) <- gsub('(\\..*)$', '', table_files)
|
||||
data$meta <- meta
|
||||
data$experiment_id <- experiment_id
|
||||
|
||||
data
|
||||
}
|
||||
|
||||
.lookup_experiment_config <- function(experiment_folder) {
|
||||
candidates <- list.files(path = experiment_folder,
|
||||
pattern = '_experiment_config_log_entry.jsonl$')
|
||||
|
||||
if (length(candidates) != 1) {
|
||||
stop(glue::glue(
|
||||
'Cannot establish the correct config file at {experiment_folder}.'))
|
||||
}
|
||||
|
||||
file.path(experiment_folder, candidates)
|
||||
}
|
||||
4
analysis/final.analysis/R/utils.R
Normal file
4
analysis/final.analysis/R/utils.R
Normal file
@ -0,0 +1,4 @@
|
||||
drop_nulls <- function(a_list) {
|
||||
a_copy <- a_list[!is.null(a_list)]
|
||||
a_copy
|
||||
}
|
||||
23
analysis/final.analysis/renv.lock
Normal file
23
analysis/final.analysis/renv.lock
Normal file
@ -0,0 +1,23 @@
|
||||
{
|
||||
"R": {
|
||||
"Version": "4.3.3",
|
||||
"Repositories": [
|
||||
{
|
||||
"Name": "CRAN",
|
||||
"URL": "https://cloud.r-project.org"
|
||||
}
|
||||
]
|
||||
},
|
||||
"Packages": {
|
||||
"renv": {
|
||||
"Package": "renv",
|
||||
"Version": "1.0.11",
|
||||
"Source": "Repository",
|
||||
"Repository": "CRAN",
|
||||
"Requirements": [
|
||||
"utils"
|
||||
],
|
||||
"Hash": "47623f66b4e80b3b0587bc5d7b309888"
|
||||
}
|
||||
}
|
||||
}
|
||||
7
analysis/final.analysis/renv/.gitignore
vendored
Normal file
7
analysis/final.analysis/renv/.gitignore
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
library/
|
||||
local/
|
||||
cellar/
|
||||
lock/
|
||||
python/
|
||||
sandbox/
|
||||
staging/
|
||||
1305
analysis/final.analysis/renv/activate.R
Normal file
1305
analysis/final.analysis/renv/activate.R
Normal file
File diff suppressed because it is too large
Load Diff
19
analysis/final.analysis/renv/settings.json
Normal file
19
analysis/final.analysis/renv/settings.json
Normal file
@ -0,0 +1,19 @@
|
||||
{
|
||||
"bioconductor.version": null,
|
||||
"external.libraries": [],
|
||||
"ignored.packages": [],
|
||||
"package.dependency.fields": [
|
||||
"Imports",
|
||||
"Depends",
|
||||
"LinkingTo"
|
||||
],
|
||||
"ppm.enabled": null,
|
||||
"ppm.ignored.urls": [],
|
||||
"r.version": null,
|
||||
"snapshot.type": "explicit",
|
||||
"use.cache": true,
|
||||
"vcs.ignore.cellar": true,
|
||||
"vcs.ignore.library": true,
|
||||
"vcs.ignore.local": true,
|
||||
"vcs.manage.ignores": true
|
||||
}
|
||||
109
analysis/final.analysis/static-dissemination.Rmd
Normal file
109
analysis/final.analysis/static-dissemination.Rmd
Normal file
@ -0,0 +1,109 @@
|
||||
---
|
||||
title: "static-dissemination.Rmd"
|
||||
output: html_document
|
||||
date: "2025-01-10"
|
||||
---
|
||||
|
||||
```{r}
|
||||
library(tidyverse)
|
||||
|
||||
devtools::load_all()
|
||||
```
|
||||
|
||||
```{r}
|
||||
group_id <- 'g1736505161'
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
deluge <- read_all_experiments('./data/deluge')
|
||||
```
|
||||
|
||||
The torrent piece size is set at torrent creation time by [torrentool](https://github.com/idlesign/torrentool/blob/5f37d6dcc304758bae46d01c63e5be0f0a348bfc/torrentool/torrent.py#L354).
|
||||
```{r}
|
||||
PIECE_SIZE <- 262144
|
||||
```
|
||||
|
||||
```{r}
|
||||
piece_count <- function(experiment) {
|
||||
experiment$meta$file_size / PIECE_SIZE
|
||||
}
|
||||
```
|
||||
|
||||
```{r}
|
||||
compute_download_time_stats <- function(experiment) {
|
||||
meta <- experiment$meta
|
||||
pieces <- experiment |> piece_count()
|
||||
downloads <- experiment$deluge_torrent_download |>
|
||||
extract_repetitions() |>
|
||||
compute_pieces(pieces)
|
||||
|
||||
if (!check_incomplete_downloads(downloads, pieces)) {
|
||||
warning(glue::glue('Discard experiment {experiment$experiment_id} ',
|
||||
'due to incomplete downloads'))
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
if (!check_mismatching_repetitions(downloads, meta$repetitions)) {
|
||||
warning(glue::glue('Discard experiment {experiment$experiment_id} ',
|
||||
'due to mismatching repetitions'))
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
download_times <- compute_download_times(
|
||||
meta,
|
||||
experiment$request_event,
|
||||
downloads,
|
||||
group_id
|
||||
)
|
||||
|
||||
if (!check_seeder_count(download_times, meta$seeders)) {
|
||||
warning(glue::glue('Undefined download times do not match seeder count'))
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
network_size <- meta$nodes$network_size
|
||||
|
||||
download_times |>
|
||||
download_time_stats() |>
|
||||
mutate(
|
||||
network_size = network_size,
|
||||
seeders = meta$seeders,
|
||||
leechers = network_size - meta$seeders,
|
||||
file_size = meta$file_size
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
compute_compact_summary <- function(download_ecdf) {
|
||||
lapply(c(0.05, 0.5, 0.95), function(p)
|
||||
download_ecdf |>
|
||||
filter(completed >= p) |>
|
||||
slice_min(completed)
|
||||
) |>
|
||||
bind_rows() |>
|
||||
select(completed, network_size, file_size, seeders, leechers, median) |>
|
||||
pivot_wider(id_cols = c('file_size', 'network_size', 'seeders', 'leechers'),
|
||||
names_from = completed, values_from = median)
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
lapply(deluge, function(experiment) {
|
||||
print(glue::glue('Process {experiment$experiment_id}'))
|
||||
download_time_stats <- tryCatch(compute_download_time_stats(experiment), error = function(e) NULL)
|
||||
if (is.null(download_time_stats)) {
|
||||
NULL
|
||||
} else {
|
||||
compute_compact_summary(download_time_stats)
|
||||
}
|
||||
}) |>
|
||||
drop_nulls() |>
|
||||
bind_rows() |>
|
||||
arrange(seeders) |>
|
||||
arrange(network_size) |>
|
||||
arrange(file_size)
|
||||
```
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user