diff --git a/analysis/final.analysis/R/analysis.R b/analysis/final.analysis/R/analysis.R index 0a5bb0d..689e4af 100644 --- a/analysis/final.analysis/R/analysis.R +++ b/analysis/final.analysis/R/analysis.R @@ -27,7 +27,7 @@ compute_pieces <- function(deluge_torrent_download, n_pieces) { } check_incomplete_downloads <- function(deluge_torrent_download, n_pieces) { - incomplete_downloads <- downloads |> + incomplete_downloads <- deluge_torrent_download |> group_by(node, seed_set, run) |> count() |> ungroup() |> @@ -52,7 +52,7 @@ compute_download_times <- function(meta, request_event, deluge_torrent_download, download_start <- request_event |> select(-request_id) |> - filter(name == 'leech', type == 'RequestEventType.end') |> + filter(name == 'leech', type == 'RequestEventType.start') |> mutate( # We didn't log those on the runner side so I have to reconstruct them. run = rep(rep( @@ -65,14 +65,15 @@ compute_download_times <- function(meta, request_event, deluge_torrent_download, transmute(node = destination, run, seed_set, seed_request_time = timestamp) download_times <- deluge_torrent_download |> - # FIXME remove this once we fix the chart - mutate(node = sub(pattern = glue::glue('-{group_id}$'), replacement = '', x = node)) |> left_join(download_start, by = c('node', 'run', 'seed_set')) |> mutate( elapsed_download_time = as.numeric(timestamp - seed_request_time) ) |> group_by(node, run, seed_set) |> - mutate(lookup_time = as.numeric(min(timestamp) - seed_request_time)) |> + mutate( + time_to_first_byte = min(timestamp), + lookup_time = as.numeric(time_to_first_byte - seed_request_time) + ) |> ungroup() if (nrow(download_times |> @@ -95,7 +96,7 @@ check_seeder_count <- function(download_times, seeders) { nrow(mismatching_seeders) == 0 } -download_time_stats <- function(download_times) { +download_stats <- function(download_times) { download_times |> filter(!is.na(elapsed_download_time)) |> group_by(piece_count, completed) |> @@ -110,7 +111,26 @@ download_time_stats <- function(download_times) { ) } -compute_download_time_stats <- function(experiment) { +completion_time_stats <- function(download_times) { + completion_times <- download_times |> + filter(!is.na(elapsed_download_time)) |> + pull(elapsed_download_time) + + + tibble( + min = min(completion_times), + p05 = quantile(completion_times, p = 0.05), + p10 = quantile(completion_times, p = 0.10), + p20 = quantile(completion_times, p = 0.20), + median = median(completion_times), + p80 = quantile(completion_times, p = 0.80), + p90 = quantile(completion_times, p = 0.90), + p95 = quantile(completion_times, p = 0.95), + max = max(completion_times) + ) +} + +download_times <- function(experiment) { meta <- experiment$meta pieces <- experiment |> piece_count() downloads <- experiment$deluge_torrent_download |> @@ -141,16 +161,7 @@ compute_download_time_stats <- function(experiment) { return(NULL) } - network_size <- meta$nodes$network_size - - download_times |> - download_time_stats() |> - mutate( - network_size = network_size, - seeders = meta$seeders, - leechers = network_size - meta$seeders, - file_size = meta$file_size - ) + download_times } diff --git a/analysis/final.analysis/static-dissemination.Rmd b/analysis/final.analysis/static-dissemination.Rmd index b063758..6de1b15 100644 --- a/analysis/final.analysis/static-dissemination.Rmd +++ b/analysis/final.analysis/static-dissemination.Rmd @@ -10,28 +10,53 @@ library(tidyverse) devtools::load_all() ``` -```{r} -group_id <- 'g1736505161' -``` - - ```{r} deluge <- read_all_experiments('./data/deluge') ``` ```{r} -lapply(deluge, function(experiment) { +benchmarks <- lapply(deluge, function(experiment) { print(glue::glue('Process {experiment$experiment_id}')) - download_time_stats <- tryCatch(compute_download_time_stats(experiment), error = function(e) { print(e); NULL }) - if (is.null(download_time_stats)) { - NULL - } else { - compute_compact_summary(download_time_stats) - } + download_time_stats <- tryCatch({ + meta <- experiment$meta + completion <- experiment |> + download_times() |> + completion_time_stats() + + if (is.null(completion)) { + NULL + } else { + completion |> mutate( + network_size = meta$nodes$network_size, + seeders = meta$seeders, + leechers = network_size - meta$seeders, + file_size = meta$file_size + ) + } + }, error = function(e) { print(e); NULL }) }) |> drop_nulls() |> bind_rows() |> - arrange(file_size, network_size, seeders, leechers) + arrange(file_size, network_size, seeders, leechers) |> + mutate( + file_size = as.character(rlang::parse_bytes(as.character(file_size))), + seeder_ratio = seeders/network_size + ) |> + relocate(file_size, network_size, seeders, leechers) +``` + +```{r} +benchmarks ``` +```{r} +ggplot(benchmarks |> filter(file_size == '104.86 MB')) + + geom_line(aes(x = network_size, y = median)) + + geom_point(aes(x = network_size, y = median)) + + ylab('median download time (seconds)') + + xlab('network size') + + theme_minimal() + + facet_wrap(seeder_ratio ~ file_size) +``` +