From 3cdb1c26011c299744438de1f2444a793e18c6de Mon Sep 17 00:00:00 2001 From: gmega Date: Fri, 20 Oct 2023 13:48:01 -0300 Subject: [PATCH] add log helper scripts and snippets, update analysis --- .gitignore | 1 + analysis/analysis.Rmd | 56 ++++++++++++++++++++++ bin/csv-concat.sh | 8 ++++ bin/pull-all-logs.sh | 29 +++++++++++ bin/{pull_pod_logs.sh => pull-pod-logs.sh} | 11 +++-- bin/snippets/README.md | 4 ++ bin/snippets/upload-bug.sh | 26 ++++++++++ 7 files changed, 130 insertions(+), 5 deletions(-) create mode 100755 bin/csv-concat.sh create mode 100755 bin/pull-all-logs.sh rename bin/{pull_pod_logs.sh => pull-pod-logs.sh} (51%) create mode 100644 bin/snippets/README.md create mode 100644 bin/snippets/upload-bug.sh diff --git a/.gitignore b/.gitignore index 5537745..031f1c6 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ dist **/__pycache__ .Rproj.user .RData +.Rhistory *.html *.log *.csv diff --git a/analysis/analysis.Rmd b/analysis/analysis.Rmd index fc4b618..e406770 100644 --- a/analysis/analysis.Rmd +++ b/analysis/analysis.Rmd @@ -8,6 +8,8 @@ library(tidyverse) library(lubridate) ``` +# Node Crashing on Upload + ```{r} uploads <- read_csv('./codex-continuous-tests-0codex3-5-77bdb95dc7-j7f46_codex3-5-uploads.csv') ``` @@ -150,3 +152,57 @@ ggplot( theme_minimal() ``` + +# Whole-Cluster + +```{r} +cluster_uploads <- read_csv('../data/20/pods/uploads/all_uploads.csv') |> filter(source != 'source') +``` + +```{r} +cluster_upload_durations <- cluster_uploads |> group_by(source, upload) |> arrange(timestamp) |> summarise(duration = as.numeric(timestamp[n()] - timestamp[1])) +``` + +```{r fig.width=12} +ggplot(cluster_upload_durations) + + geom_line(aes(x = upload, y = duration, col = source)) + + theme_minimal() + + facet_wrap(. ~ source) + + guides(color = FALSE) +``` + +```{r} +cluster_interlog_intervals <- cluster_uploads |> + group_by(source, upload) |> + arrange(timestamp) |> + mutate(log_interval = as.numeric(timestamp - lag(timestamp))) |> + ungroup() +``` + +```{r fig.width=10} +cluster_interlog_intervals |> + group_by(source, upload) |> + summarise( + mean_li = mean(log_interval, na.rm=TRUE), + median_li = median(log_interval, na.rm=TRUE), + max_li = max(log_interval, na.rm=TRUE), + ) |> + pivot_longer(-c(source, upload)) %>% { + ggplot(.) + + geom_line(aes(x = upload, y = value, col = name)) + + scale_y_log10() + + theme_minimal() + + ylab('interval between log messages (logscale, seconds)') + + facet_wrap(. ~ source) + } +``` + + +```{r} +ggplot(cluster_interlog_intervals) + + geom_line(aes(x = upload, y = duration, col = source)) + + theme_minimal() + + facet_wrap(. ~ source) + + guides(color = FALSE) +``` + diff --git a/bin/csv-concat.sh b/bin/csv-concat.sh new file mode 100755 index 0000000..d4ae906 --- /dev/null +++ b/bin/csv-concat.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Concatenates CSV files that have identical headers by removing the header from all but the first file. This is +# meant to be used after a call to `cat`; e.g., cat csv1.csv csv2.csv | lscsv-concat.sh +set -e + +header=$(head -n 1) +echo "$header" +grep "$header" -Fv \ No newline at end of file diff --git a/bin/pull-all-logs.sh b/bin/pull-all-logs.sh new file mode 100755 index 0000000..94f3d26 --- /dev/null +++ b/bin/pull-all-logs.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Given a namespace and a base folder containing the runner logs for continuous tests, creates +# a storage area (folder) and: +# +# 1. pulls pod logs into storage_area/pods +# 2. copies runner logs to storage_area/runner +# +# Make sure you delete the original runner logs once this is done, as otherwise they might get copied into more +# than one storage area. +set -e + +namespace=${1} +runner_log_source=${2} + +if [ -z "$namespace" ] || [ -z "$runner_logs" ]; then + echo "Usage: bin/process_logs.sh " + exit 1 +fi + +run_id=$(date +'%Y-%m-%d-%H%M%S') +logs="data/logs/$run_id" +pod_logs="$logs/pods" +runner_logs="$logs/runner" + +mkdir -p "$pod_logs" +bash bin/pull_pod_logs.sh "$namespace" "$pod_logs" + +mkdir -p "$runner_logs" +cp "$runner_log_source" "$runner_logs" \ No newline at end of file diff --git a/bin/pull_pod_logs.sh b/bin/pull-pod-logs.sh similarity index 51% rename from bin/pull_pod_logs.sh rename to bin/pull-pod-logs.sh index 95546bc..f21d230 100755 --- a/bin/pull_pod_logs.sh +++ b/bin/pull-pod-logs.sh @@ -1,22 +1,23 @@ #!/bin/bash -NAMESPACE=${1:-"codex-continuous-tests"} +namespace=${1:-"codex-continuous-tests"} +output_folder=${2:./} # List all pods in the namespace -pods=$(kubectl get pods -n $NAMESPACE -o jsonpath='{.items[*].metadata.name}') +pods=$(kubectl get pods -n "$namespace" -o jsonpath='{.items[*].metadata.name}') for pod in $pods; do echo "Fetching logs for $pod..." # Handle pods with multiple containers - containers=$(kubectl get pod $pod -n $NAMESPACE -o jsonpath='{.spec.containers[*].name}') + containers=$(kubectl get pod "$pod" -n "$namespace" -o jsonpath='{.spec.containers[*].name}') for container in $containers; do if [ "$container" == "$pod" ]; then # If there's only one container, name the log file after the pod - kubectl logs $pod -n $NAMESPACE > "${1}${pod}.log" + kubectl logs "$pod" -n "$namespace" > "${output_folder}/${pod}.log" else # If there are multiple containers, name the log file after the pod and container - kubectl logs $pod -c $container -n $NAMESPACE > "${1}${pod}_${container}.log" + kubectl logs "$pod" -c "$container" -n "$namespace" > "${output_folder}/${pod}_${container}.log" fi done done diff --git a/bin/snippets/README.md b/bin/snippets/README.md new file mode 100644 index 0000000..0aa9d74 --- /dev/null +++ b/bin/snippets/README.md @@ -0,0 +1,4 @@ +# Snippets + +Ad hoc snippets which reshape data for one-off analysis, not worth the trouble of making into scripts. + diff --git a/bin/snippets/upload-bug.sh b/bin/snippets/upload-bug.sh new file mode 100644 index 0000000..ffdfd72 --- /dev/null +++ b/bin/snippets/upload-bug.sh @@ -0,0 +1,26 @@ +set -e + +base_folder=${1:-"./data/20"} +mkdir -p "${base_folder}/pods/uploads" + +# tags uploads with id +for i in "${base_folder}"/pods/codex-continuous-tests-0codex*; do + python -m adhoc.identify_uploads < "$i" > "${i%/*}/uploads/${i##*/}" +done + +# transforms raw logs into single CSV +for i in "${base_folder}"/pods/uploads/codex-continuous-tests-0codex*; do + python -m logtools.cli.to_csv < "$i" \ + --extract-fields upload \ + --constant-column \ + source=${${i##*/}%.*} >> "${base_folder}"/pods/uploads/all_uploads.csv.temp +done + +./bin/csv-concat.sh < "${base_folder}"/pods/uploads/all_uploads.csv.temp > "${base_folder}"/pods/uploads/all_uploads.csv +rm "${base_folder}"/pods/uploads/all_uploads.csv.temp + +# extracts debug endpoint data and looks into wantlist sizes +grep -h 'Before upload\|After download' "${base_folder}"/runner/*.log | \ + sed -E 's/\[(.{28})\] <([A-Z]+[0-9]+)> (Before upload|After download): (.*)$/\4/p' > "${base_folder}"/runner/merged.jsonl + +jq '.pendingBlocks' < "${base_folder}"/runner/merged.jsonl | uniq # should print 0 \ No newline at end of file