From 700753cf3dd3d80505fd453248252fe7e611bd27 Mon Sep 17 00:00:00 2001 From: gmega Date: Mon, 13 Jan 2025 20:16:41 -0300 Subject: [PATCH] chore: move analysis code into analysis; properly discard experiments that error out --- analysis/final.analysis/R/analysis.R | 62 +++++++++++++++ .../final.analysis/static-dissemination.Rmd | 76 +------------------ 2 files changed, 64 insertions(+), 74 deletions(-) diff --git a/analysis/final.analysis/R/analysis.R b/analysis/final.analysis/R/analysis.R index e6da2e2..0a5bb0d 100644 --- a/analysis/final.analysis/R/analysis.R +++ b/analysis/final.analysis/R/analysis.R @@ -1,3 +1,9 @@ +PIECE_SIZE <- 262144 + +piece_count <- function(experiment) { + experiment$meta$file_size / PIECE_SIZE +} + extract_repetitions <- function(deluge_torrent_download) { deluge_torrent_download |> mutate( @@ -104,3 +110,59 @@ download_time_stats <- function(download_times) { ) } +compute_download_time_stats <- function(experiment) { + meta <- experiment$meta + pieces <- experiment |> piece_count() + downloads <- experiment$deluge_torrent_download |> + extract_repetitions() |> + compute_pieces(pieces) + + if (!check_incomplete_downloads(downloads, pieces)) { + warning(glue::glue('Discard experiment {experiment$experiment_id} ', + 'due to incomplete downloads')) + return(NULL) + } + + if (!check_mismatching_repetitions(downloads, meta$repetitions)) { + warning(glue::glue('Discard experiment {experiment$experiment_id} ', + 'due to mismatching repetitions')) + return(NULL) + } + + download_times <- compute_download_times( + meta, + experiment$request_event, + downloads, + group_id + ) + + if (!check_seeder_count(download_times, meta$seeders)) { + warning(glue::glue('Undefined download times do not match seeder count')) + return(NULL) + } + + network_size <- meta$nodes$network_size + + download_times |> + download_time_stats() |> + mutate( + network_size = network_size, + seeders = meta$seeders, + leechers = network_size - meta$seeders, + file_size = meta$file_size + ) +} + + +compute_compact_summary <- function(download_ecdf) { + lapply(c(0.05, 0.5, 0.95), function(p) + download_ecdf |> + filter(completed >= p) |> + slice_min(completed) + ) |> + bind_rows() |> + select(completed, network_size, file_size, seeders, leechers, median) |> + pivot_wider(id_cols = c('file_size', 'network_size', 'seeders', 'leechers'), + names_from = completed, values_from = median) +} + diff --git a/analysis/final.analysis/static-dissemination.Rmd b/analysis/final.analysis/static-dissemination.Rmd index 469dc5c..b063758 100644 --- a/analysis/final.analysis/static-dissemination.Rmd +++ b/analysis/final.analysis/static-dissemination.Rmd @@ -19,80 +19,10 @@ group_id <- 'g1736505161' deluge <- read_all_experiments('./data/deluge') ``` -The torrent piece size is set at torrent creation time by [torrentool](https://github.com/idlesign/torrentool/blob/5f37d6dcc304758bae46d01c63e5be0f0a348bfc/torrentool/torrent.py#L354). -```{r} -PIECE_SIZE <- 262144 -``` - -```{r} -piece_count <- function(experiment) { - experiment$meta$file_size / PIECE_SIZE -} -``` - -```{r} -compute_download_time_stats <- function(experiment) { - meta <- experiment$meta - pieces <- experiment |> piece_count() - downloads <- experiment$deluge_torrent_download |> - extract_repetitions() |> - compute_pieces(pieces) - - if (!check_incomplete_downloads(downloads, pieces)) { - warning(glue::glue('Discard experiment {experiment$experiment_id} ', - 'due to incomplete downloads')) - return(NULL) - } - - if (!check_mismatching_repetitions(downloads, meta$repetitions)) { - warning(glue::glue('Discard experiment {experiment$experiment_id} ', - 'due to mismatching repetitions')) - return(NULL) - } - - download_times <- compute_download_times( - meta, - experiment$request_event, - downloads, - group_id - ) - - if (!check_seeder_count(download_times, meta$seeders)) { - warning(glue::glue('Undefined download times do not match seeder count')) - return(NULL) - } - - network_size <- meta$nodes$network_size - - download_times |> - download_time_stats() |> - mutate( - network_size = network_size, - seeders = meta$seeders, - leechers = network_size - meta$seeders, - file_size = meta$file_size - ) -} - - -compute_compact_summary <- function(download_ecdf) { - lapply(c(0.05, 0.5, 0.95), function(p) - download_ecdf |> - filter(completed >= p) |> - slice_min(completed) - ) |> - bind_rows() |> - select(completed, network_size, file_size, seeders, leechers, median) |> - pivot_wider(id_cols = c('file_size', 'network_size', 'seeders', 'leechers'), - names_from = completed, values_from = median) -} - -``` - ```{r} lapply(deluge, function(experiment) { print(glue::glue('Process {experiment$experiment_id}')) - download_time_stats <- tryCatch(compute_download_time_stats(experiment), error = function(e) NULL) + download_time_stats <- tryCatch(compute_download_time_stats(experiment), error = function(e) { print(e); NULL }) if (is.null(download_time_stats)) { NULL } else { @@ -101,9 +31,7 @@ lapply(deluge, function(experiment) { }) |> drop_nulls() |> bind_rows() |> - arrange(seeders) |> - arrange(network_size) |> - arrange(file_size) + arrange(file_size, network_size, seeders, leechers) ```