add pr-statistics scripts to get some stats on open or already merged PR

Signed-off-by: Oskar Thoren <ot@oskarthoren.com>
This commit is contained in:
Oskar Thoren 2018-01-05 16:53:39 +09:00
parent 413f9c2fad
commit 2dd25646ca
No known key found for this signature in database
GPG Key ID: 5128AB0637CD85AF

263
scripts/pr-statistics.sh Executable file
View File

@ -0,0 +1,263 @@
#!/usr/bin/env bash
# Will scan a github repo to compute some statistics on PR
# requires curl and bc (presents on most unix like system)
# PARAMETERS ----------------------------
# Github repo to scan
GITHUB_REPO="status-im/status-react"
# Computational mode
# available values :
# open : catch only open PRs, not yet merged nor closed and then compute five number
# summary on opened time until today (open is default)
# merge : catch only merged AND closed PRs and then compute five number summary
# on time spent before PR was merged
MODE=open
# Filter PR by date
# This parameter is optional, you could empty these values to disable filter by date
# Will only catch PR created between these dates
# if END_TIME is empty, it will assume today as value
# WARN : PR github api limit each result to a 30 page size
# So depending on chosen date, it could be a lot of requests
# available values : YYYY-MM-DD
START_TIME="2018-01-01"
END_TIME="2018-02-01"
# Verbose mode
# print more info (0 is default)
# available values : 0 | 1
VERBOSE=1
# INTERNAL PARAMETERS ----------------------------
# Determine platform
case "$OSTYPE" in
darwin*) OS="DARWIN" ;;
*) OS="UNIX" ;;
esac
# Check available tools
# curl and bc (which are present on most unix platform)
type curl &>/dev/null || _err=1
if [ "$_err" ]; then
echo "ERROR : you need curl on this platform"
exit 1
fi
type bc &>/dev/null || _err=1
if [ "$_err" ]; then
echo "ERROR : you need bc on this platform"
exit 1
fi
# Fix date filter
NOW="$(date +%s)"
FILTER_TEXT=""
if [ ! "$START_TIME" = "" ]; then
FILTER="ON"
FILTER_TEXT=" created between $START_TIME"
[ "$OS" = "DARWIN" ] && START_TIME="$(date -j -u -f '%Y-%m-%d' "$START_TIME" +%s)" \
|| START_TIME="$(date -d "$START_TIME" +%s)"
# we get today by default for END_TIME
if [ "$END_TIME" = "" ]; then
FILTER_TEXT="$FILTER_TEXT and today"
END_TIME="$NOW"
else
FILTER_TEXT="$FILTER_TEXT and $END_TIME"
[ "$OS" = "DARWIN" ] && END_TIME="$(date -j -u -f '%Y-%m-%d' "$END_TIME" +%s)" \
|| END_TIME="$(date -d "$END_TIME" +%s)"
fi
fi
# FUNCTIONS ---------------------------
usage() {
echo "USAGE"
echo "$0 -h : get help"
echo "$0 [merge|open] : compute stats on merged PR or opened PR (open is default)"
}
github_pr_merge() {
repo="$1"
list_spent_time=""
list_dt_closed=""
list_dt_created=""
last_page=$(curl -i -sL "https://api.github.com/repos/$repo/pulls?sort=created&state=closed" | grep rel=\"last\" | cut -d "," -f 2 | cut -d "=" -f 4 | cut -d ">" -f 1)
[ "$last_page" = "" ] && last_page=1
[ "$VERBOSE" = "1" ] && echo "" >&2
for i in $(seq 1 $last_page); do
[ "$VERBOSE" = "1" ] && tput cuu 1 >&2 && tput el >&2 && echo "[Analysing PRs list: $i/$last_page]" >&2
json="$(curl -sL "https://api.github.com/repos/$repo/pulls?sort=created&direction=desc&state=closed&page=$i")"
# get PR merged AND closed - we use closed date to compute stat and we ignore if its merged date is null
list_tmp="$(echo "$json" | grep -A1 -B2 "closed_at" | grep --invert-match null | grep -B4 "merged_at" | grep -A3 "created_at")"
list_dt_created="$list_dt_created $(echo "$list_tmp" | grep "created_at" | tr -d ' ' | cut -d "\"" -f 4)"
list_dt_closed="$list_dt_closed $(echo "$list_tmp" | grep "closed_at" | tr -d ' ' | cut -d "\"" -f 4)"
len_list_dt="$(echo $list_dt_created | wc -w)"
for k in $(seq 1 $len_list_dt); do
dt_crea="$(echo $list_dt_created | cut -d " " -f $k)"
[ "$OS" = "DARWIN" ] && dt_crea="$(date -j -u -f '%Y-%m-%dT%H:%M:%SZ' "$dt_crea" +%s)" \
|| dt_crea="$(date -d "$dt_crea" +%s)"
dt_closed="$(echo $list_dt_closed | cut -d " " -f $k)"
[ "$OS" = "DARWIN" ] && dt_closed="$(date -j -u -f '%Y-%m-%dT%H:%M:%SZ' "$dt_closed" +%s)" \
|| dt_closed="$(date -d "$dt_closed" +%s)"
if [ "$FILTER" = "ON" ]; then
if [ "1" = "$(echo "$START_TIME <= $dt_crea" | bc -l)" ]; then
if [ "1" = "$(echo "$END_TIME >= $dt_crea" | bc -l)" ]; then
(( spent_time = dt_closed - dt_crea ))
list_spent_time="$list_spent_time $spent_time"
fi
fi
else
(( spent_time = dt_closed - dt_crea ))
list_spent_time="$list_spent_time $spent_time"
fi
done
if [ "$FILTER" = "ON" ]; then
# we do not catch anymore page because we get all PR for the period
if [ "1" = "$(echo "$START_TIME > $dt_crea" | bc -l)" ]; then
[ "$VERBOSE" = "1" ] && echo "[Stop fetching PR, other PR are out of the selected time]" >&2
break
fi
fi
done
echo "$list_spent_time" | tr ' ' '\n' | sort -n | tr ' ' '\n'
}
github_pr_open() {
repo="$1"
list_spent_time=""
list_dt_created=""
last_page=$(curl -i -sL "https://api.github.com/repos/$repo/pulls?sort=created&state=open" | grep rel=\"last\" | cut -d "," -f 2 | cut -d "=" -f 4 | cut -d ">" -f 1)
[ "$last_page" = "" ] && last_page=1
[ "$VERBOSE" = "1" ] && echo "" >&2
for i in $(seq 1 $last_page); do
[ "$VERBOSE" = "1" ] && tput cuu 1 >&2 && tput el >&2 && echo "[Analysing PRs list: $i/$last_page]" >&2
json="$(curl -sL "https://api.github.com/repos/$repo/pulls?sort=created&direction=desc&state=open&page=$i")"
list_dt_created="$list_dt_created $(echo "$json" | grep -B2 "closed_at" | grep "created_at" | tr -d ' ' | cut -d "\"" -f 4)"
for dt_crea in $list_dt_created; do
[ "$OS" = "DARWIN" ] && dt_crea="$(date -j -u -f '%Y-%m-%dT%H:%M:%SZ' "$dt_crea" +%s)" \
|| dt_crea="$(date -d "$dt_crea" +%s)"
if [ "$FILTER" = "ON" ]; then
if [ "1" = "$(echo "$START_TIME <= $dt_crea" | bc -l)" ]; then
if [ "1" = "$(echo "$END_TIME >= $dt_crea" | bc -l)" ]; then
(( spent_time = NOW - dt_crea ))
list_spent_time="$list_spent_time $spent_time"
fi
fi
else
(( spent_time = NOW - dt_crea ))
list_spent_time="$list_spent_time $spent_time"
fi
done
if [ "$FILTER" = "ON" ]; then
# we do not catch anymore page because we get all PR for the period
if [ "1" = "$(echo "$START_TIME > $dt_crea" | bc -l)" ]; then
[ "$VERBOSE" = "1" ] && echo "[Stop fetching PRs, next will be out of the selected time]" >&2
break
fi
fi
done
echo "$list_spent_time" | tr ' ' '\n' | sort -n | tr ' ' '\n'
}
median_list() {
list="$@"
len_list="$#"
median=""
if [ $len_list -eq 1 ]; then
middle_list=1
middle_list_next=1
else
(( middle_list = len_list / 2 ))
(( middle_list_next = middle_list + 1 ))
fi
j=1
for l in $list; do
[ $j -eq $middle_list ] && median=$l
if [ $j -eq $middle_list_next ]; then
median=$(echo "scale=2; ($median + $l)/2" | bc -l)
break
fi
(( j = j + 1 ))
done
echo "$median"
}
convert_sec() {
secs="$1"
printf '%dd %02dh:%02dm:%02ds\n' $(($secs/86400)) $(($secs%86400/3600)) $(($secs%3600/60)) $(($secs%60))
}
five_nb_summary() {
list="$@"
len_list="$#"
sample_minimum=""
lower_quartile=""
median=""
upper_quartile=""
sample_maximum=""
if [ $len_list -gt 0 ]; then
# NOTE list is already sorted asc
sample_minimum=$(echo $list | cut -d " " -f 1)
sample_maximum=$(echo $list | cut -d " " -f $len_list)
median="$(median_list $list)"
for l in $list; do
# Q1
[ "1" = "$(echo "$l <= $median" | bc -l)" ] && q1_list="$q1_list $l"
# Q3
[ "1" = "$(echo "$l >= $median" | bc -l)" ] && q3_list="$q3_list $l"
done
lower_quartile="$(median_list $q1_list | cut -d '.' -f 1)"
median="$(echo $median | cut -d '.' -f 1)"
upper_quartile="$(median_list $q3_list | cut -d '.' -f 1)"
fi
[ "$VERBOSE" = "1" ] && echo "[Stats computed on $len_list PR$FILTER_TEXT]" >&2
printf "\n"
printf "%16s | %16s | %16s | %16s | %16s" "Min" "Q1" "Mean" "Q3" "Max"
printf "\n"
[ $len_list -gt 0 ] && printf "%16s | %16s | %16s | %16s | %16s" "$(convert_sec "$sample_minimum")" "$(convert_sec "$lower_quartile")" "$(convert_sec "$median")" "$(convert_sec "$upper_quartile")" "$(convert_sec "$sample_maximum")"
printf "\n"
}
# MAIN ----------------------------
if [ "$1" = "-h" ]; then
usage
exit
fi
case $1 in
merge|open )
MODE=$1
;;
esac
if [ "$VERBOSE" = "1" ]; then
echo "[Fetching $MODE PRs stats from $GITHUB_REPO]" >&2
fi
list_time="$(github_pr_$MODE $GITHUB_REPO)"
five_nb_summary $list_time