feat: complete final analysis

This commit is contained in:
gmega 2025-01-14 19:35:20 -03:00
parent 8ef05e36a9
commit 742578a3f7
No known key found for this signature in database
GPG Key ID: 6290D34EAD824B18
2 changed files with 66 additions and 30 deletions

View File

@ -27,7 +27,7 @@ compute_pieces <- function(deluge_torrent_download, n_pieces) {
}
check_incomplete_downloads <- function(deluge_torrent_download, n_pieces) {
incomplete_downloads <- downloads |>
incomplete_downloads <- deluge_torrent_download |>
group_by(node, seed_set, run) |>
count() |>
ungroup() |>
@ -52,7 +52,7 @@ compute_download_times <- function(meta, request_event, deluge_torrent_download,
download_start <- request_event |>
select(-request_id) |>
filter(name == 'leech', type == 'RequestEventType.end') |>
filter(name == 'leech', type == 'RequestEventType.start') |>
mutate(
# We didn't log those on the runner side so I have to reconstruct them.
run = rep(rep(
@ -65,14 +65,15 @@ compute_download_times <- function(meta, request_event, deluge_torrent_download,
transmute(node = destination, run, seed_set, seed_request_time = timestamp)
download_times <- deluge_torrent_download |>
# FIXME remove this once we fix the chart
mutate(node = sub(pattern = glue::glue('-{group_id}$'), replacement = '', x = node)) |>
left_join(download_start, by = c('node', 'run', 'seed_set')) |>
mutate(
elapsed_download_time = as.numeric(timestamp - seed_request_time)
) |>
group_by(node, run, seed_set) |>
mutate(lookup_time = as.numeric(min(timestamp) - seed_request_time)) |>
mutate(
time_to_first_byte = min(timestamp),
lookup_time = as.numeric(time_to_first_byte - seed_request_time)
) |>
ungroup()
if (nrow(download_times |>
@ -95,7 +96,7 @@ check_seeder_count <- function(download_times, seeders) {
nrow(mismatching_seeders) == 0
}
download_time_stats <- function(download_times) {
download_stats <- function(download_times) {
download_times |>
filter(!is.na(elapsed_download_time)) |>
group_by(piece_count, completed) |>
@ -110,7 +111,26 @@ download_time_stats <- function(download_times) {
)
}
compute_download_time_stats <- function(experiment) {
completion_time_stats <- function(download_times) {
completion_times <- download_times |>
filter(!is.na(elapsed_download_time)) |>
pull(elapsed_download_time)
tibble(
min = min(completion_times),
p05 = quantile(completion_times, p = 0.05),
p10 = quantile(completion_times, p = 0.10),
p20 = quantile(completion_times, p = 0.20),
median = median(completion_times),
p80 = quantile(completion_times, p = 0.80),
p90 = quantile(completion_times, p = 0.90),
p95 = quantile(completion_times, p = 0.95),
max = max(completion_times)
)
}
download_times <- function(experiment) {
meta <- experiment$meta
pieces <- experiment |> piece_count()
downloads <- experiment$deluge_torrent_download |>
@ -141,16 +161,7 @@ compute_download_time_stats <- function(experiment) {
return(NULL)
}
network_size <- meta$nodes$network_size
download_times |>
download_time_stats() |>
mutate(
network_size = network_size,
seeders = meta$seeders,
leechers = network_size - meta$seeders,
file_size = meta$file_size
)
download_times
}

View File

@ -10,28 +10,53 @@ library(tidyverse)
devtools::load_all()
```
```{r}
group_id <- 'g1736505161'
```
```{r}
deluge <- read_all_experiments('./data/deluge')
```
```{r}
lapply(deluge, function(experiment) {
benchmarks <- lapply(deluge, function(experiment) {
print(glue::glue('Process {experiment$experiment_id}'))
download_time_stats <- tryCatch(compute_download_time_stats(experiment), error = function(e) { print(e); NULL })
if (is.null(download_time_stats)) {
NULL
} else {
compute_compact_summary(download_time_stats)
}
download_time_stats <- tryCatch({
meta <- experiment$meta
completion <- experiment |>
download_times() |>
completion_time_stats()
if (is.null(completion)) {
NULL
} else {
completion |> mutate(
network_size = meta$nodes$network_size,
seeders = meta$seeders,
leechers = network_size - meta$seeders,
file_size = meta$file_size
)
}
}, error = function(e) { print(e); NULL })
}) |>
drop_nulls() |>
bind_rows() |>
arrange(file_size, network_size, seeders, leechers)
arrange(file_size, network_size, seeders, leechers) |>
mutate(
file_size = as.character(rlang::parse_bytes(as.character(file_size))),
seeder_ratio = seeders/network_size
) |>
relocate(file_size, network_size, seeders, leechers)
```
```{r}
benchmarks
```
```{r}
ggplot(benchmarks |> filter(file_size == '104.86 MB')) +
geom_line(aes(x = network_size, y = median)) +
geom_point(aes(x = network_size, y = median)) +
ylab('median download time (seconds)') +
xlab('network size') +
theme_minimal() +
facet_wrap(seeder_ratio ~ file_size)
```