feat: generalize speedup analysis to multiple groups

This commit is contained in:
gmega 2025-11-10 11:02:29 -03:00
parent d3a1bef683
commit 897fd926ad
No known key found for this signature in database
GPG Key ID: 6290D34EAD824B18
2 changed files with 35 additions and 22 deletions

View File

@ -208,3 +208,28 @@ compute_compact_summary <- function(download_ecdf) {
names_from = completed, values_from = median)
}
compute_speedups <- function(benchmarks, baseline, compare) {
baseline_data <- benchmarks |>
filter(label == baseline) |>
select(
experiment_type, label, network_size, seeders, leechers, file_size, median
) |>
rename(baseline_median = median)
lapply(compare, function(compare_label) {
browser()
benchmarks |>
filter(label == compare_label) |>
inner_join(
baseline_data,
by = c('network_size', 'seeders', 'leechers', 'file_size')
) |>
mutate(
relative_median = median / baseline_median
) |>
mutate(label = label.x) |>
select(-baseline_median, -label.y, -label.x)
}) |>
bind_rows()
}

View File

@ -111,23 +111,11 @@ DT::datatable(
```
```{r echo = FALSE}
relative_performance <- benchmarks |>
filter(experiment_type == 'deluge_experiment_config_log_entry') |>
transmute(
file_size, network_size, seeders, leechers, deluge_median = median,
) |>
inner_join(
benchmarks |>
filter(experiment_type == 'codex_experiment_config_log_entry') |>
select(
file_size, network_size, seeders, leechers, codex_median = median
),
by = c('file_size', 'network_size', 'seeders', 'leechers')
) |>
mutate(
performance = codex_median / deluge_median,
seeder_ratio = seeders / network_size
)
relative_performance <- compute_speedups(
benchmarks = benchmarks,
base = 'deluge',
compare = c('codex-baseline', 'codex-optimized')
)
```
## Median Download Speed
@ -186,15 +174,15 @@ Let $t_d$ and $t_c$ be the median times that Deluge and Codex, respectively, tak
When $m < 1$, Codex is faster than Deluge. It is otherwise $m$ times slower to download the same file.
```{r fig.cap='Median downlaod time ratio for Codex and Deluge', fig.width = 11, message = FALSE, echo = FALSE}
ggplot(relative_performance) +
geom_line(aes(x = network_size, y = performance, col = file_size), lwd=1) +
ggplot(relative_performance, aes(col = label, group = label)) +
geom_line(aes(x = network_size, y = relative_median, col = label), lwd=1) +
geom_hline(yintercept = 1, linetype = 'dashed', col = 'darkgray') +
geom_point(aes(x = network_size, y = performance, col = file_size)) +
ylab('median Codex/Deluge performance ratio') +
geom_point(aes(x = network_size, y = relative_median, col = label)) +
ylab('median speedup/slowdown over Deluge') +
annotate('text', label = 'faster', x = 29, y = 0, col = 'darkgreen') +
annotate('text', label = 'slower', x = 28.5, y = 2, col = 'darkred') +
theme_minimal(base_size=15) +
scale_color_discrete(name = 'file size') +
scale_color_discrete(name = '') +
facet_grid(
file_size ~ seeder_ratio,
labeller = labeller(