chore: move analysis code into analysis; properly discard experiments that error out

This commit is contained in:
gmega 2025-01-13 20:16:41 -03:00
parent 4cf3117390
commit 700753cf3d
No known key found for this signature in database
GPG Key ID: 6290D34EAD824B18
2 changed files with 64 additions and 74 deletions

View File

@ -1,3 +1,9 @@
PIECE_SIZE <- 262144
piece_count <- function(experiment) {
experiment$meta$file_size / PIECE_SIZE
}
extract_repetitions <- function(deluge_torrent_download) {
deluge_torrent_download |>
mutate(
@ -104,3 +110,59 @@ download_time_stats <- function(download_times) {
)
}
compute_download_time_stats <- function(experiment) {
meta <- experiment$meta
pieces <- experiment |> piece_count()
downloads <- experiment$deluge_torrent_download |>
extract_repetitions() |>
compute_pieces(pieces)
if (!check_incomplete_downloads(downloads, pieces)) {
warning(glue::glue('Discard experiment {experiment$experiment_id} ',
'due to incomplete downloads'))
return(NULL)
}
if (!check_mismatching_repetitions(downloads, meta$repetitions)) {
warning(glue::glue('Discard experiment {experiment$experiment_id} ',
'due to mismatching repetitions'))
return(NULL)
}
download_times <- compute_download_times(
meta,
experiment$request_event,
downloads,
group_id
)
if (!check_seeder_count(download_times, meta$seeders)) {
warning(glue::glue('Undefined download times do not match seeder count'))
return(NULL)
}
network_size <- meta$nodes$network_size
download_times |>
download_time_stats() |>
mutate(
network_size = network_size,
seeders = meta$seeders,
leechers = network_size - meta$seeders,
file_size = meta$file_size
)
}
compute_compact_summary <- function(download_ecdf) {
lapply(c(0.05, 0.5, 0.95), function(p)
download_ecdf |>
filter(completed >= p) |>
slice_min(completed)
) |>
bind_rows() |>
select(completed, network_size, file_size, seeders, leechers, median) |>
pivot_wider(id_cols = c('file_size', 'network_size', 'seeders', 'leechers'),
names_from = completed, values_from = median)
}

View File

@ -19,80 +19,10 @@ group_id <- 'g1736505161'
deluge <- read_all_experiments('./data/deluge')
```
The torrent piece size is set at torrent creation time by [torrentool](https://github.com/idlesign/torrentool/blob/5f37d6dcc304758bae46d01c63e5be0f0a348bfc/torrentool/torrent.py#L354).
```{r}
PIECE_SIZE <- 262144
```
```{r}
piece_count <- function(experiment) {
experiment$meta$file_size / PIECE_SIZE
}
```
```{r}
compute_download_time_stats <- function(experiment) {
meta <- experiment$meta
pieces <- experiment |> piece_count()
downloads <- experiment$deluge_torrent_download |>
extract_repetitions() |>
compute_pieces(pieces)
if (!check_incomplete_downloads(downloads, pieces)) {
warning(glue::glue('Discard experiment {experiment$experiment_id} ',
'due to incomplete downloads'))
return(NULL)
}
if (!check_mismatching_repetitions(downloads, meta$repetitions)) {
warning(glue::glue('Discard experiment {experiment$experiment_id} ',
'due to mismatching repetitions'))
return(NULL)
}
download_times <- compute_download_times(
meta,
experiment$request_event,
downloads,
group_id
)
if (!check_seeder_count(download_times, meta$seeders)) {
warning(glue::glue('Undefined download times do not match seeder count'))
return(NULL)
}
network_size <- meta$nodes$network_size
download_times |>
download_time_stats() |>
mutate(
network_size = network_size,
seeders = meta$seeders,
leechers = network_size - meta$seeders,
file_size = meta$file_size
)
}
compute_compact_summary <- function(download_ecdf) {
lapply(c(0.05, 0.5, 0.95), function(p)
download_ecdf |>
filter(completed >= p) |>
slice_min(completed)
) |>
bind_rows() |>
select(completed, network_size, file_size, seeders, leechers, median) |>
pivot_wider(id_cols = c('file_size', 'network_size', 'seeders', 'leechers'),
names_from = completed, values_from = median)
}
```
```{r}
lapply(deluge, function(experiment) {
print(glue::glue('Process {experiment$experiment_id}'))
download_time_stats <- tryCatch(compute_download_time_stats(experiment), error = function(e) NULL)
download_time_stats <- tryCatch(compute_download_time_stats(experiment), error = function(e) { print(e); NULL })
if (is.null(download_time_stats)) {
NULL
} else {
@ -101,9 +31,7 @@ lapply(deluge, function(experiment) {
}) |>
drop_nulls() |>
bind_rows() |>
arrange(seeders) |>
arrange(network_size) |>
arrange(file_size)
arrange(file_size, network_size, seeders, leechers)
```