mirror of
https://github.com/logos-storage/bittorrent-benchmarks.git
synced 2026-01-05 06:23:13 +00:00
106 lines
3.1 KiB
Plaintext
106 lines
3.1 KiB
Plaintext
---
|
|
title: "Analysis for Deluge Benchmarks - Static Network Dissemination Experiment"
|
|
output:
|
|
bookdown::html_notebook2:
|
|
number_sections: TRUE
|
|
toc: TRUE
|
|
date: "2025-01-15"
|
|
---
|
|
|
|
This document contains the analysis for the Deluge benchmarks.
|
|
|
|
```{r message=FALSE}
|
|
library(tidyverse)
|
|
|
|
devtools::load_all()
|
|
```
|
|
|
|
# Parse/Load Data
|
|
|
|
This is data that's been pre-parsed from an experiment [log source](https://github.com/codex-storage/bittorrent-benchmarks/blob/1ee8ea8a35a2c0fccea6e7c955183c4ed03eebb3/benchmarks/logging/sources.py#L27).
|
|
|
|
```{r}
|
|
experiments <- read_all_experiments('./data/g1739826980')
|
|
```
|
|
|
|
```{r}
|
|
COUNT_DISTINCT = list(
|
|
'codex_static_dissemination' = FALSE,
|
|
'deluge_static_dissemination' = TRUE
|
|
)
|
|
```
|
|
|
|
|
|
Computes the benchmark statistics from raw download logs.
|
|
|
|
```{r}
|
|
benchmarks <- lapply(experiments, function(experiment) {
|
|
print(glue::glue('Process {experiment$experiment_id}'))
|
|
download_time_stats <- tryCatch({
|
|
meta <- experiment$meta
|
|
completion <- experiment |>
|
|
download_times(
|
|
piece_count_distinct = COUNT_DISTINCT[[meta$experiment_type]]) |>
|
|
completion_time_stats(meta)
|
|
|
|
if (is.null(completion)) {
|
|
NULL
|
|
} else {
|
|
completion |> mutate(
|
|
experiment_type = meta$experiment_type,
|
|
network_size = meta$nodes$network_size,
|
|
seeders = meta$seeders,
|
|
leechers = network_size - meta$seeders,
|
|
file_size = meta$file_size
|
|
)
|
|
}
|
|
}, error = function(e) { print(e); NULL })
|
|
}) |>
|
|
drop_nulls() |>
|
|
bind_rows() |>
|
|
arrange(file_size, network_size, seeders, leechers) |>
|
|
mutate(
|
|
# This factor conversion is horrible but needed so things are sorted properly in the plot.
|
|
file_size = factor(rlang::parse_bytes(as.character(file_size)),
|
|
levels = rlang::parse_bytes(as.character(
|
|
unique(file_size[order(file_size, decreasing = TRUE)])))),
|
|
seeder_ratio = seeders/network_size
|
|
) |>
|
|
relocate(file_size, network_size, seeders, leechers)
|
|
```
|
|
|
|
# Results
|
|
|
|
First, we present the raw data in tabular format:
|
|
|
|
```{r}
|
|
benchmarks
|
|
```
|
|
|
|
We then plot the median by network size, and facet it by seeder ratio and file size to see if looks sane:
|
|
|
|
```{r fig.width = 10, warning=FALSE, message=FALSE}
|
|
ggplot(benchmarks, aes(col = experiment_type, fill = experiment_type)) +
|
|
geom_ribbon(aes(ymin = p25, ymax = p75, x = network_size),
|
|
fill = scales::alpha('blue', 0.5), col = 'lightgray') +
|
|
geom_point(aes(x = network_size, y = p25), col = 'darkgray', size=10.0, shape='-') +
|
|
geom_point(aes(x = network_size, y = p75), col = 'darkgray', size=10.0, shape='-') +
|
|
geom_line(aes(x = network_size, y = median)) +
|
|
geom_point(aes(x = network_size, y = median)) +
|
|
ylab('median download time (seconds)') +
|
|
xlab('network size') +
|
|
theme_minimal(base_size=15) +
|
|
facet_grid(
|
|
file_size ~ seeder_ratio,
|
|
scales = 'free_y',
|
|
labeller = labeller(
|
|
file_size = as_labeller(function(x) x),
|
|
seeder_ratio = as_labeller(function(x) {
|
|
paste0("seeder ratio: ", scales::percent(as.numeric(x)))
|
|
}))
|
|
) +
|
|
scale_color_discrete(name = 'experiment type') +
|
|
guides(fill = 'none') +
|
|
ylim(c(0,NA))
|
|
```
|