add log helper scripts and snippets, update analysis
This commit is contained in:
parent
aef2dbc870
commit
3cdb1c2601
|
@ -4,6 +4,7 @@ dist
|
||||||
**/__pycache__
|
**/__pycache__
|
||||||
.Rproj.user
|
.Rproj.user
|
||||||
.RData
|
.RData
|
||||||
|
.Rhistory
|
||||||
*.html
|
*.html
|
||||||
*.log
|
*.log
|
||||||
*.csv
|
*.csv
|
||||||
|
|
|
@ -8,6 +8,8 @@ library(tidyverse)
|
||||||
library(lubridate)
|
library(lubridate)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
# Node Crashing on Upload
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
uploads <- read_csv('./codex-continuous-tests-0codex3-5-77bdb95dc7-j7f46_codex3-5-uploads.csv')
|
uploads <- read_csv('./codex-continuous-tests-0codex3-5-77bdb95dc7-j7f46_codex3-5-uploads.csv')
|
||||||
```
|
```
|
||||||
|
@ -150,3 +152,57 @@ ggplot(
|
||||||
theme_minimal()
|
theme_minimal()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
# Whole-Cluster
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
cluster_uploads <- read_csv('../data/20/pods/uploads/all_uploads.csv') |> filter(source != 'source')
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
cluster_upload_durations <- cluster_uploads |> group_by(source, upload) |> arrange(timestamp) |> summarise(duration = as.numeric(timestamp[n()] - timestamp[1]))
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r fig.width=12}
|
||||||
|
ggplot(cluster_upload_durations) +
|
||||||
|
geom_line(aes(x = upload, y = duration, col = source)) +
|
||||||
|
theme_minimal() +
|
||||||
|
facet_wrap(. ~ source) +
|
||||||
|
guides(color = FALSE)
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
cluster_interlog_intervals <- cluster_uploads |>
|
||||||
|
group_by(source, upload) |>
|
||||||
|
arrange(timestamp) |>
|
||||||
|
mutate(log_interval = as.numeric(timestamp - lag(timestamp))) |>
|
||||||
|
ungroup()
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r fig.width=10}
|
||||||
|
cluster_interlog_intervals |>
|
||||||
|
group_by(source, upload) |>
|
||||||
|
summarise(
|
||||||
|
mean_li = mean(log_interval, na.rm=TRUE),
|
||||||
|
median_li = median(log_interval, na.rm=TRUE),
|
||||||
|
max_li = max(log_interval, na.rm=TRUE),
|
||||||
|
) |>
|
||||||
|
pivot_longer(-c(source, upload)) %>% {
|
||||||
|
ggplot(.) +
|
||||||
|
geom_line(aes(x = upload, y = value, col = name)) +
|
||||||
|
scale_y_log10() +
|
||||||
|
theme_minimal() +
|
||||||
|
ylab('interval between log messages (logscale, seconds)') +
|
||||||
|
facet_wrap(. ~ source)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
ggplot(cluster_interlog_intervals) +
|
||||||
|
geom_line(aes(x = upload, y = duration, col = source)) +
|
||||||
|
theme_minimal() +
|
||||||
|
facet_wrap(. ~ source) +
|
||||||
|
guides(color = FALSE)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Concatenates CSV files that have identical headers by removing the header from all but the first file. This is
|
||||||
|
# meant to be used after a call to `cat`; e.g., cat csv1.csv csv2.csv | lscsv-concat.sh
|
||||||
|
set -e
|
||||||
|
|
||||||
|
header=$(head -n 1)
|
||||||
|
echo "$header"
|
||||||
|
grep "$header" -Fv
|
|
@ -0,0 +1,29 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Given a namespace and a base folder containing the runner logs for continuous tests, creates
|
||||||
|
# a storage area (folder) and:
|
||||||
|
#
|
||||||
|
# 1. pulls pod logs into storage_area/pods
|
||||||
|
# 2. copies runner logs to storage_area/runner
|
||||||
|
#
|
||||||
|
# Make sure you delete the original runner logs once this is done, as otherwise they might get copied into more
|
||||||
|
# than one storage area.
|
||||||
|
set -e
|
||||||
|
|
||||||
|
namespace=${1}
|
||||||
|
runner_log_source=${2}
|
||||||
|
|
||||||
|
if [ -z "$namespace" ] || [ -z "$runner_logs" ]; then
|
||||||
|
echo "Usage: bin/process_logs.sh <namespace> <runner_logs>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
run_id=$(date +'%Y-%m-%d-%H%M%S')
|
||||||
|
logs="data/logs/$run_id"
|
||||||
|
pod_logs="$logs/pods"
|
||||||
|
runner_logs="$logs/runner"
|
||||||
|
|
||||||
|
mkdir -p "$pod_logs"
|
||||||
|
bash bin/pull_pod_logs.sh "$namespace" "$pod_logs"
|
||||||
|
|
||||||
|
mkdir -p "$runner_logs"
|
||||||
|
cp "$runner_log_source" "$runner_logs"
|
|
@ -1,22 +1,23 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
NAMESPACE=${1:-"codex-continuous-tests"}
|
namespace=${1:-"codex-continuous-tests"}
|
||||||
|
output_folder=${2:./}
|
||||||
|
|
||||||
# List all pods in the namespace
|
# List all pods in the namespace
|
||||||
pods=$(kubectl get pods -n $NAMESPACE -o jsonpath='{.items[*].metadata.name}')
|
pods=$(kubectl get pods -n "$namespace" -o jsonpath='{.items[*].metadata.name}')
|
||||||
|
|
||||||
for pod in $pods; do
|
for pod in $pods; do
|
||||||
echo "Fetching logs for $pod..."
|
echo "Fetching logs for $pod..."
|
||||||
|
|
||||||
# Handle pods with multiple containers
|
# Handle pods with multiple containers
|
||||||
containers=$(kubectl get pod $pod -n $NAMESPACE -o jsonpath='{.spec.containers[*].name}')
|
containers=$(kubectl get pod "$pod" -n "$namespace" -o jsonpath='{.spec.containers[*].name}')
|
||||||
for container in $containers; do
|
for container in $containers; do
|
||||||
if [ "$container" == "$pod" ]; then
|
if [ "$container" == "$pod" ]; then
|
||||||
# If there's only one container, name the log file after the pod
|
# If there's only one container, name the log file after the pod
|
||||||
kubectl logs $pod -n $NAMESPACE > "${1}${pod}.log"
|
kubectl logs "$pod" -n "$namespace" > "${output_folder}/${pod}.log"
|
||||||
else
|
else
|
||||||
# If there are multiple containers, name the log file after the pod and container
|
# If there are multiple containers, name the log file after the pod and container
|
||||||
kubectl logs $pod -c $container -n $NAMESPACE > "${1}${pod}_${container}.log"
|
kubectl logs "$pod" -c "$container" -n "$namespace" > "${output_folder}/${pod}_${container}.log"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
done
|
done
|
|
@ -0,0 +1,4 @@
|
||||||
|
# Snippets
|
||||||
|
|
||||||
|
Ad hoc snippets which reshape data for one-off analysis, not worth the trouble of making into scripts.
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
set -e
|
||||||
|
|
||||||
|
base_folder=${1:-"./data/20"}
|
||||||
|
mkdir -p "${base_folder}/pods/uploads"
|
||||||
|
|
||||||
|
# tags uploads with id
|
||||||
|
for i in "${base_folder}"/pods/codex-continuous-tests-0codex*; do
|
||||||
|
python -m adhoc.identify_uploads < "$i" > "${i%/*}/uploads/${i##*/}"
|
||||||
|
done
|
||||||
|
|
||||||
|
# transforms raw logs into single CSV
|
||||||
|
for i in "${base_folder}"/pods/uploads/codex-continuous-tests-0codex*; do
|
||||||
|
python -m logtools.cli.to_csv < "$i" \
|
||||||
|
--extract-fields upload \
|
||||||
|
--constant-column \
|
||||||
|
source=${${i##*/}%.*} >> "${base_folder}"/pods/uploads/all_uploads.csv.temp
|
||||||
|
done
|
||||||
|
|
||||||
|
./bin/csv-concat.sh < "${base_folder}"/pods/uploads/all_uploads.csv.temp > "${base_folder}"/pods/uploads/all_uploads.csv
|
||||||
|
rm "${base_folder}"/pods/uploads/all_uploads.csv.temp
|
||||||
|
|
||||||
|
# extracts debug endpoint data and looks into wantlist sizes
|
||||||
|
grep -h 'Before upload\|After download' "${base_folder}"/runner/*.log | \
|
||||||
|
sed -E 's/\[(.{28})\] <([A-Z]+[0-9]+)> (Before upload|After download): (.*)$/\4/p' > "${base_folder}"/runner/merged.jsonl
|
||||||
|
|
||||||
|
jq '.pendingBlocks' < "${base_folder}"/runner/merged.jsonl | uniq # should print 0
|
Loading…
Reference in New Issue