mirror of
https://github.com/logos-storage/bittorrent-benchmarks.git
synced 2026-05-18 06:49:29 +00:00
extend analysis with time to first byte and transfer time
This commit is contained in:
parent
8a834ecfee
commit
74e6486d56
@ -2,3 +2,5 @@
|
||||
^renv\.lock$
|
||||
^.*\.Rproj$
|
||||
^\.Rproj\.user$
|
||||
^\.positai$
|
||||
^\.claude$
|
||||
|
||||
@ -94,13 +94,21 @@ compute_download_times <- function(meta, request_event, download_metric, group_i
|
||||
) |>
|
||||
group_by(node, run, seed_set) |>
|
||||
mutate(
|
||||
time_to_first_byte = min(timestamp),
|
||||
lookup_time = as.numeric(time_to_first_byte - seed_request_time)
|
||||
# The time elapsed between the instant we start the download and the time
|
||||
# we see the first log entry is approximated as "time to first byte".
|
||||
# In reality, this factors lookup time, swarm bootstrap time, and
|
||||
# part of the download itself.
|
||||
first_byte_t = min(timestamp),
|
||||
first_byte = as.numeric(first_byte_t - seed_request_time),
|
||||
# "Transfer" time is the total download time minus the lookup time. Again,
|
||||
# this is approximated, and likely reflects a shorter download time than
|
||||
# the real download time.
|
||||
transfer = as.numeric(max(timestamp) - first_byte_t)
|
||||
) |>
|
||||
ungroup()
|
||||
|
||||
if (nrow(download_times |>
|
||||
filter(elapsed_download_time < 0 | lookup_time < 0)) > 0) {
|
||||
filter(elapsed_download_time < 0 | first_byte < 0 | transfer < 0)) > 0) {
|
||||
stop('Calculation for download times contains negative numbers')
|
||||
}
|
||||
|
||||
@ -137,36 +145,49 @@ download_times <- function(experiment, piece_count_distinct, discard_incomplete
|
||||
|
||||
|
||||
completion_time_stats <- function(download_times, meta) {
|
||||
completion_times <- download_times |>
|
||||
filtered <- download_times |>
|
||||
filter(!is.na(elapsed_download_time),
|
||||
is_completed(completed)) |>
|
||||
pull(elapsed_download_time)
|
||||
is_completed(completed))
|
||||
|
||||
n_experiments <- meta$repetitions * meta$seeder_sets
|
||||
n_leechers <- meta$nodes$network_size - meta$seeders
|
||||
n_points <- n_experiments * n_leechers
|
||||
|
||||
tibble(
|
||||
n = length(completion_times),
|
||||
n = nrow(filtered),
|
||||
expected_n = n_points,
|
||||
missing = expected_n - n,
|
||||
min = min(completion_times),
|
||||
p05 = quantile(completion_times, p = 0.05),
|
||||
p10 = quantile(completion_times, p = 0.10),
|
||||
p20 = quantile(completion_times, p = 0.20),
|
||||
p25 = quantile(completion_times, p = 0.25),
|
||||
median = median(completion_times),
|
||||
p75 = quantile(completion_times, p = 0.75),
|
||||
p80 = quantile(completion_times, p = 0.80),
|
||||
p90 = quantile(completion_times, p = 0.90),
|
||||
p95 = quantile(completion_times, p = 0.95),
|
||||
max = max(completion_times),
|
||||
completion = distributional_stats(filtered$elapsed_download_time),
|
||||
first_byte = distributional_stats(filtered$first_byte),
|
||||
transfer = distributional_stats(filtered$transfer)
|
||||
) |>
|
||||
unnest(
|
||||
cols = c(completion, first_byte, transfer),
|
||||
names_sep = '_'
|
||||
)
|
||||
}
|
||||
|
||||
distributional_stats <- function(x) {
|
||||
n <- length(x)
|
||||
tibble(
|
||||
min = min(x),
|
||||
p05 = quantile(x, p = 0.05),
|
||||
p10 = quantile(x, p = 0.10),
|
||||
p20 = quantile(x, p = 0.20),
|
||||
p25 = quantile(x, p = 0.25),
|
||||
median = median(x),
|
||||
p75 = quantile(x, p = 0.75),
|
||||
p80 = quantile(x, p = 0.80),
|
||||
p90 = quantile(x, p = 0.90),
|
||||
p95 = quantile(x, p = 0.95),
|
||||
max = max(x),
|
||||
iqr = p75 - p25,
|
||||
# This gives us roughly a 95% ci for comparing medians.
|
||||
ci = (1.58 * iqr) / sqrt(n),
|
||||
w_top = median + ci,
|
||||
w_bottom = median - ci
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
check_seeder_count <- function(download_times, seeders) {
|
||||
@ -212,13 +233,12 @@ compute_speedups <- function(benchmarks, baseline, compare) {
|
||||
baseline_data <- benchmarks |>
|
||||
filter(label == baseline) |>
|
||||
select(
|
||||
experiment_type, label, network_size, seeders, leechers, file_size, median
|
||||
experiment_type, label, network_size, seeders, leechers, file_size, completion_median
|
||||
) |>
|
||||
rename(baseline_median = median)
|
||||
rename(baseline_median = completion_median)
|
||||
|
||||
|
||||
lapply(compare, function(compare_label) {
|
||||
browser()
|
||||
benchmarks |>
|
||||
filter(label == compare_label) |>
|
||||
inner_join(
|
||||
@ -226,7 +246,7 @@ compute_speedups <- function(benchmarks, baseline, compare) {
|
||||
by = c('network_size', 'seeders', 'leechers', 'file_size')
|
||||
) |>
|
||||
mutate(
|
||||
relative_median = median / baseline_median
|
||||
relative_median = completion_median / baseline_median
|
||||
) |>
|
||||
mutate(label = label.x) |>
|
||||
select(-baseline_median, -label.y, -label.x)
|
||||
|
||||
@ -31,9 +31,10 @@ devtools::load_all()
|
||||
```
|
||||
|
||||
```{r message = FALSE, include = !knitr::is_html_output()}
|
||||
experiments <- read_all_experiments('./data/devnet/optimized/g1761924045/', label = 'deluge') |>
|
||||
merge_experiments(read_all_experiments('./data/devnet/optimized/g1762505060/', label ='codex-baseline')) |>
|
||||
merge_experiments(read_all_experiments('./data/devnet/optimized/g1761729711/', label = 'codex-optimized'))
|
||||
experiments <- read_all_experiments('./data/do/g1761924045/', label = 'deluge') |>
|
||||
merge_experiments(read_all_experiments('./data/do/g1762505060/', label = 'codex-baseline')) |>
|
||||
merge_experiments(read_all_experiments('./data/do/g1761729711/', label = 'codex-optimized')) |>
|
||||
merge_experiments(read_all_experiments('./data/do/g1775565300/', label = 'new-protocol'))
|
||||
```
|
||||
|
||||
```{r include = !knitr::is_html_output()}
|
||||
@ -45,7 +46,7 @@ COUNT_DISTINCT = list(
|
||||
|
||||
```{r message = FALSE, include = !knitr::is_html_output()}
|
||||
benchmarks <- lapply(experiments, function(experiment) {
|
||||
print(glue::glue('Process {experiment$experiment_id}'))
|
||||
print(glue::glue('Process {experiment$experiment_id} - {experiment$label}'))
|
||||
download_time_stats <- tryCatch({
|
||||
meta <- experiment$meta
|
||||
completion <- experiment |>
|
||||
@ -77,9 +78,12 @@ benchmarks <- lapply(experiments, function(experiment) {
|
||||
levels = rlang::parse_bytes(as.character(
|
||||
unique(file_size[order(file_size, decreasing = TRUE)])))),
|
||||
seeder_ratio = seeders / network_size,
|
||||
median_speed = file_size_bytes / median,
|
||||
p25_speed = file_size_bytes / p25,
|
||||
p75_speed = file_size_bytes / p75
|
||||
completion_median_speed = file_size_bytes / completion_median,
|
||||
completion_p25_speed = file_size_bytes / completion_p25,
|
||||
completion_p75_speed = file_size_bytes / completion_p75,
|
||||
transfer_median_speed = file_size_bytes / transfer_median,
|
||||
transfer_p25_speed = file_size_bytes / transfer_p25,
|
||||
transfer_p75_speed = file_size_bytes / transfer_p75
|
||||
) |>
|
||||
relocate(file_size, network_size, seeders, leechers, file_size_bytes)
|
||||
```
|
||||
@ -114,58 +118,62 @@ DT::datatable(
|
||||
relative_performance <- compute_speedups(
|
||||
benchmarks = benchmarks,
|
||||
base = 'deluge',
|
||||
compare = c('codex-baseline', 'codex-optimized')
|
||||
compare = c('codex-baseline', 'codex-optimized', 'new-protocol')
|
||||
)
|
||||
```
|
||||
|
||||
## Median Download Speed
|
||||
|
||||
```{r fig.cap='Median download speed for Deluge and Codex', fig.width = 11, message = FALSE, echo = FALSE}
|
||||
ggplot(benchmarks, aes(col = label, fill = label, group = label)) +
|
||||
geom_ribbon(aes(ymin = p25_speed, ymax = p75_speed, x = network_size, fill = label, alpha = 0.5), col = 'lightgray') +
|
||||
geom_point(aes(x = network_size, y = p25_speed), col = 'darkgray', size=10.0, shape='-') +
|
||||
geom_point(aes(x = network_size, y = p75_speed), col = 'darkgray', size=10.0, shape='-') +
|
||||
geom_line(aes(x = network_size, y = median_speed)) +
|
||||
geom_point(aes(x = network_size, y = median_speed)) +
|
||||
ylab('median download speed (bytes/second)') +
|
||||
xlab('network size') +
|
||||
theme_minimal(base_size=15) +
|
||||
scale_y_continuous(labels = function(x) paste0(scales::label_bytes()(x), '/s')) +
|
||||
facet_grid(
|
||||
file_size ~ seeder_ratio,
|
||||
labeller = labeller(
|
||||
seeder_ratio = as_labeller(function(x) {
|
||||
paste0("seeder ratio: ", scales::percent(as.numeric(x)))
|
||||
}))
|
||||
) +
|
||||
scale_color_discrete(name = '') +
|
||||
guides(fill = 'none', alpha = 'none')
|
||||
comparison_plot(
|
||||
benchmarks,
|
||||
completion_p25_speed,
|
||||
completion_p75_speed,
|
||||
completion_median_speed,
|
||||
ylab = 'median download speed (bytes/second)',
|
||||
free_y = TRUE
|
||||
) + Y_BPS
|
||||
```
|
||||
|
||||
## Median Transfer Speed
|
||||
|
||||
```{r fig.width = 11, message = FALSE, echo = FALSE}
|
||||
comparison_plot(
|
||||
benchmarks,
|
||||
transfer_p25_speed,
|
||||
transfer_p75_speed,
|
||||
transfer_median_speed,
|
||||
ylab = 'median transfer speed (bytes/second)',
|
||||
free_y = TRUE
|
||||
) + Y_BPS
|
||||
```
|
||||
|
||||
## Median Download Time
|
||||
|
||||
|
||||
```{r fig.cap='Median time to download a whole file for Deluge and Codex', fig.width = 11, message = FALSE, echo = FALSE}
|
||||
ggplot(benchmarks, aes(col = label, fill = label, group = label)) +
|
||||
geom_ribbon(aes(ymin = p25, ymax = p75, x = network_size, fill = experiment_type, alpha = 0.5), col = 'lightgray') +
|
||||
geom_point(aes(x = network_size, y = p25), col = 'darkgray', size=10.0, shape='-') +
|
||||
geom_point(aes(x = network_size, y = p75), col = 'darkgray', size=10.0, shape='-') +
|
||||
geom_line(aes(x = network_size, y = median)) +
|
||||
geom_point(aes(x = network_size, y = median)) +
|
||||
ylab('median download time') +
|
||||
xlab('network size') +
|
||||
theme_minimal(base_size = 15) +
|
||||
scale_y_continuous(labels = scales::label_timespan()) +
|
||||
facet_grid(
|
||||
scales = 'free_y',
|
||||
file_size ~ seeder_ratio,
|
||||
labeller = labeller(
|
||||
seeder_ratio = as_labeller(function(x) {
|
||||
paste0("seeder ratio: ", scales::percent(as.numeric(x)))
|
||||
}))
|
||||
) +
|
||||
scale_color_discrete(name = '') +
|
||||
guides(fill = 'none', alpha = 'none')
|
||||
comparison_plot(
|
||||
benchmarks,
|
||||
completion_p25,
|
||||
completion_p75,
|
||||
completion_median,
|
||||
ylab = 'median download time',
|
||||
free_y = TRUE
|
||||
) + Y_TIMESPAN
|
||||
```
|
||||
|
||||
## Median Time to First Byte
|
||||
|
||||
The time elapsed from the moment in which we ask a node to download a file to the time in which it logs having downloaded the first $x\%$ of the file -- whatever the logging granularity is -- marks our time to first byte. This is actually an approximation which factors in _i)_ DHT lookup latency; _ii)_ swarm bootstrap latency; _iii)_ a fraction, typically $1/100^{th}$, of the download time. This should impact smaller files more than it impacts larger files.
|
||||
|
||||
```{r fig.cap='Median time-to-first-byte for Deluge and Codex', fig.width = 11, message = FALSE, echo = FALSE}
|
||||
comparison_plot(
|
||||
benchmarks,
|
||||
first_byte_p25,
|
||||
first_byte_p75,
|
||||
first_byte_median,
|
||||
ylab = 'median download time',
|
||||
free_y = TRUE
|
||||
) + Y_TIMESPAN
|
||||
```
|
||||
|
||||
## Median Download Time Ratio
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user