--- title: "Analysis for Deluge Benchmarks - Static Network Dissemination Experiment" output: bookdown::html_notebook2: number_sections: TRUE toc: TRUE date: "2025-01-15" --- This document contains the analysis for the Deluge benchmarks. ```{r message=FALSE} library(tidyverse) devtools::load_all() ``` # Parse/Load Data This is data that's been pre-parsed from an experiment [log source](https://github.com/codex-storage/bittorrent-benchmarks/blob/1ee8ea8a35a2c0fccea6e7c955183c4ed03eebb3/benchmarks/logging/sources.py#L27). ```{r} experiments <- read_all_experiments('./data/g1739826980') ``` ```{r} COUNT_DISTINCT = list( 'codex_static_dissemination' = FALSE, 'deluge_static_dissemination' = TRUE ) ``` Computes the benchmark statistics from raw download logs. ```{r} benchmarks <- lapply(experiments, function(experiment) { print(glue::glue('Process {experiment$experiment_id}')) download_time_stats <- tryCatch({ meta <- experiment$meta completion <- experiment |> download_times( piece_count_distinct = COUNT_DISTINCT[[meta$experiment_type]]) |> completion_time_stats(meta) if (is.null(completion)) { NULL } else { completion |> mutate( experiment_type = meta$experiment_type, network_size = meta$nodes$network_size, seeders = meta$seeders, leechers = network_size - meta$seeders, file_size = meta$file_size ) } }, error = function(e) { print(e); NULL }) }) |> drop_nulls() |> bind_rows() |> arrange(file_size, network_size, seeders, leechers) |> mutate( # This factor conversion is horrible but needed so things are sorted properly in the plot. file_size = factor(rlang::parse_bytes(as.character(file_size)), levels = rlang::parse_bytes(as.character( unique(file_size[order(file_size, decreasing = TRUE)])))), seeder_ratio = seeders/network_size ) |> relocate(file_size, network_size, seeders, leechers) ``` # Results First, we present the raw data in tabular format: ```{r} benchmarks ``` We then plot the median by network size, and facet it by seeder ratio and file size to see if looks sane: ```{r fig.width = 10, warning=FALSE, message=FALSE} ggplot(benchmarks, aes(col = experiment_type, fill = experiment_type)) + geom_ribbon(aes(ymin = p25, ymax = p75, x = network_size), fill = scales::alpha('blue', 0.5), col = 'lightgray') + geom_point(aes(x = network_size, y = p25), col = 'darkgray', size=10.0, shape='-') + geom_point(aes(x = network_size, y = p75), col = 'darkgray', size=10.0, shape='-') + geom_line(aes(x = network_size, y = median)) + geom_point(aes(x = network_size, y = median)) + ylab('median download time (seconds)') + xlab('network size') + theme_minimal(base_size=15) + facet_grid( file_size ~ seeder_ratio, scales = 'free_y', labeller = labeller( file_size = as_labeller(function(x) x), seeder_ratio = as_labeller(function(x) { paste0("seeder ratio: ", scales::percent(as.numeric(x))) })) ) + scale_color_discrete(name = 'experiment type') + guides(fill = 'none') + ylim(c(0,NA)) ```