209 lines
4.7 KiB
Plaintext
209 lines
4.7 KiB
Plaintext
---
|
|
title: "R Notebook"
|
|
output: html_notebook
|
|
---
|
|
|
|
```{r}
|
|
library(tidyverse)
|
|
library(lubridate)
|
|
```
|
|
|
|
# Node Crashing on Upload
|
|
|
|
```{r}
|
|
uploads <- read_csv('./codex-continuous-tests-0codex3-5-77bdb95dc7-j7f46_codex3-5-uploads.csv')
|
|
```
|
|
|
|
|
|
```{r}
|
|
durations <- uploads |>
|
|
arrange(count) |>
|
|
group_by(upload) |>
|
|
summarise(
|
|
start = timestamp[1],
|
|
end = timestamp[n()],
|
|
) |>
|
|
mutate(duration = end - start)
|
|
```
|
|
|
|
How long are uploads taking?
|
|
|
|
```{r}
|
|
ggplot(durations, aes(x = upload, y = duration)) +
|
|
geom_point() +
|
|
geom_line() +
|
|
ylab('upload duration') +
|
|
xlab('upload number') +
|
|
theme_minimal()
|
|
```
|
|
Are all uploads completing?
|
|
|
|
```{r}
|
|
uploads |>
|
|
filter(message == 'Got data from stream') |>
|
|
group_by(upload) |>
|
|
count(name = 'blocks')
|
|
```
|
|
|
|
Does the end of the upload coincide with the last chunk that gets stored?
|
|
|
|
```{r}
|
|
uploads |>
|
|
filter(grepl('Got data from stream', message)) |>
|
|
group_by(upload) |>
|
|
summarise(
|
|
last_store = max(timestamp)
|
|
) |>
|
|
inner_join(durations, by='upload')
|
|
```
|
|
|
|
```{r}
|
|
durations
|
|
```
|
|
|
|
```{r}
|
|
uploads |> filter(grepl('Exception', message)) |> group_by(message) |> count() |> arrange(n)
|
|
```
|
|
|
|
```{r}
|
|
uploads |> filter(upload == 18) |> group_by(message) |> count() |> arrange(n)
|
|
```
|
|
|
|
```{r}
|
|
uploads |> filter(upload == 17) |> group_by(message) |> count() |> arrange(n)
|
|
```
|
|
|
|
```{r}
|
|
messages <- uploads |> group_by(message) |> count() |> filter(n > 100) |> pull(message)
|
|
```
|
|
|
|
|
|
```{r fig.height = 10}
|
|
uploads |> filter(message %in% messages) |> group_by(upload, message) |> count() %>% {
|
|
ggplot(.) +
|
|
geom_point(aes(x = message, y = n, color = as.factor(upload))) + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust=1)) +
|
|
ylab('count') +
|
|
scale_color_manual(values=c('18'='red'))
|
|
}
|
|
```
|
|
|
|
|
|
```{r}
|
|
interlog_intervals <- uploads |>
|
|
group_by(upload) |>
|
|
arrange(timestamp) |>
|
|
mutate(log_interval = as.numeric(timestamp - lag(timestamp))) |>
|
|
ungroup()
|
|
```
|
|
|
|
|
|
```{r}
|
|
interlog_intervals |>
|
|
group_by(upload) |>
|
|
summarise(
|
|
mean_li = mean(log_interval, na.rm=TRUE),
|
|
median_li = median(log_interval, na.rm=TRUE),
|
|
max_li = max(log_interval, na.rm=TRUE),
|
|
) |>
|
|
pivot_longer(-upload) %>% {
|
|
ggplot(.) +
|
|
geom_line(aes(x = upload, y = value, col = name)) +
|
|
scale_y_log10() +
|
|
theme_minimal() +
|
|
ylab('duration (logscale, seconds)')
|
|
}
|
|
```
|
|
|
|
```{r}
|
|
interlog_intervals |> group_by(upload) |> count() |> arrange(desc(n))
|
|
```
|
|
|
|
|
|
```{r fig.height=5}
|
|
interlog_intervals |>
|
|
group_by(upload) |>
|
|
arrange(log_interval) |>
|
|
mutate(rank = seq_along(log_interval)) |> ungroup() %>% {
|
|
ggplot(.) +
|
|
geom_point(aes(x = rank, y = log_interval, col = as.factor(upload))) +
|
|
theme_minimal() +
|
|
xlab('rank') +
|
|
ylab('time between two consecutive log messages') +
|
|
guides(col = guide_legend(title = 'upload #'))
|
|
}
|
|
```
|
|
|
|
```{r}
|
|
ggplot(
|
|
interlog_intervals |>
|
|
filter(upload == 18
|
|
) |>
|
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
|
group_by(bucket) |>
|
|
mutate(
|
|
mean_interval = mean(log_interval),
|
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
|
) |>
|
|
ungroup()
|
|
) +
|
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
|
theme_minimal()
|
|
```
|
|
|
|
|
|
# Whole-Cluster
|
|
|
|
```{r}
|
|
cluster_uploads <- read_csv('../data/20/pods/uploads/all_uploads.csv') |> filter(source != 'source')
|
|
```
|
|
|
|
```{r}
|
|
cluster_upload_durations <- cluster_uploads |> group_by(source, upload) |> arrange(timestamp) |> summarise(duration = as.numeric(timestamp[n()] - timestamp[1]))
|
|
```
|
|
|
|
```{r fig.width=12}
|
|
ggplot(cluster_upload_durations) +
|
|
geom_line(aes(x = upload, y = duration, col = source)) +
|
|
theme_minimal() +
|
|
facet_wrap(. ~ source) +
|
|
guides(color = FALSE)
|
|
```
|
|
|
|
```{r}
|
|
cluster_interlog_intervals <- cluster_uploads |>
|
|
group_by(source, upload) |>
|
|
arrange(timestamp) |>
|
|
mutate(log_interval = as.numeric(timestamp - lag(timestamp))) |>
|
|
ungroup()
|
|
```
|
|
|
|
```{r fig.width=10}
|
|
cluster_interlog_intervals |>
|
|
group_by(source, upload) |>
|
|
summarise(
|
|
mean_li = mean(log_interval, na.rm=TRUE),
|
|
median_li = median(log_interval, na.rm=TRUE),
|
|
max_li = max(log_interval, na.rm=TRUE),
|
|
) |>
|
|
pivot_longer(-c(source, upload)) %>% {
|
|
ggplot(.) +
|
|
geom_line(aes(x = upload, y = value, col = name)) +
|
|
scale_y_log10() +
|
|
theme_minimal() +
|
|
ylab('interval between log messages (logscale, seconds)') +
|
|
facet_wrap(. ~ source)
|
|
}
|
|
```
|
|
|
|
|
|
```{r}
|
|
ggplot(cluster_interlog_intervals) +
|
|
geom_line(aes(x = upload, y = duration, col = source)) +
|
|
theme_minimal() +
|
|
facet_wrap(. ~ source) +
|
|
guides(color = FALSE)
|
|
```
|
|
|