#!/usr/bin/env bash

# Will scan a github repo to compute some statistics on PR
# requires curl and bc (presents on most unix like system)


# PARAMETERS ----------------------------
# Github repo to scan
GITHUB_REPO="status-im/status-mobile"

# Computational mode
# available values :
# open : catch only open PRs, not yet merged nor closed and then compute five number
#        summary on opened time until today (open is default)
# merge : catch only merged AND closed PRs and then compute five number summary
#         on time spent before PR was merged
MODE=open

# Filter PR by date
#   This parameter is optional, you could empty these values to disable filter by date
#   Will only catch PR created between these dates
#   if END_TIME is empty, it will assume today as value
# WARN : PR github api limit each result to a 30 page size
#       So depending on chosen date, it could be a lot of requests
# available values : YYYY-MM-DD
START_TIME="2018-01-01"
END_TIME="2018-02-01"

# Verbose mode
#   print more info (0 is default)
# available values : 0 | 1
VERBOSE=1


# INTERNAL PARAMETERS ----------------------------
# Determine platform
case "$OSTYPE" in
  darwin*)  OS="DARWIN" ;;
  *)        OS="UNIX" ;;
esac

# Check available tools
# curl and bc (which are present on most unix platform)
type curl &>/dev/null || _err=1
if [ "$_err" ]; then
    echo "ERROR : you need curl on this platform"
    exit 1
fi
type bc &>/dev/null || _err=1
if [ "$_err" ]; then
    echo "ERROR : you need bc on this platform"
    exit 1
fi

# Fix date filter
NOW="$(date +%s)"
FILTER_TEXT=""
if [ ! "$START_TIME" = "" ]; then
  FILTER="ON"
  FILTER_TEXT=" created between $START_TIME"
  [ "$OS" = "DARWIN" ] && START_TIME="$(date -j -u -f '%Y-%m-%d' "$START_TIME" +%s)" \
                          || START_TIME="$(date -d "$START_TIME" +%s)"

  # we get today by default for END_TIME
  if [ "$END_TIME" = "" ]; then
    FILTER_TEXT="$FILTER_TEXT and today"
    END_TIME="$NOW"
  else
    FILTER_TEXT="$FILTER_TEXT and $END_TIME"
    [ "$OS" = "DARWIN" ] && END_TIME="$(date -j -u -f '%Y-%m-%d' "$END_TIME" +%s)" \
                            || END_TIME="$(date -d "$END_TIME" +%s)"
  fi
fi

# FUNCTIONS ---------------------------
usage() {
  echo "USAGE"
  echo "$0 -h : get help"
  echo "$0 [merge|open] : compute stats on merged PR or opened PR (open is default)"
}

github_pr_merge() {
  repo="$1"
  list_spent_time=""
  list_dt_closed=""
  list_dt_created=""

	last_page=$(curl -i -sL "https://api.github.com/repos/$repo/pulls?sort=created&state=closed" | grep rel=\"last\" | cut -d "," -f 2 | cut -d "=" -f 4 | cut -d ">" -f 1)
  [ "$last_page" = "" ] && last_page=1
  [ "$VERBOSE" = "1" ] && echo "" >&2
  for i in $(seq 1 $last_page); do
    [ "$VERBOSE" = "1" ] && tput cuu 1 >&2 && tput el >&2 && echo "[Analysing PRs list: $i/$last_page]" >&2
    json="$(curl -sL "https://api.github.com/repos/$repo/pulls?sort=created&direction=desc&state=closed&page=$i")"

    # get PR merged AND closed - we use closed date to compute stat and we ignore if its merged date is null
    list_tmp="$(echo "$json" | grep -A1 -B2 "closed_at" | grep --invert-match null | grep -B4 "merged_at" | grep -A3 "created_at")"
    list_dt_created="$list_dt_created $(echo "$list_tmp" | grep "created_at" | tr -d ' ' | cut -d "\"" -f 4)"
    list_dt_closed="$list_dt_closed $(echo "$list_tmp" | grep "closed_at" | tr -d ' ' | cut -d "\"" -f 4)"

    len_list_dt="$(echo $list_dt_created | wc -w)"

    for k in $(seq 1 $len_list_dt); do
      dt_crea="$(echo $list_dt_created | cut -d " " -f $k)"
      [ "$OS" = "DARWIN" ] && dt_crea="$(date -j -u -f '%Y-%m-%dT%H:%M:%SZ' "$dt_crea" +%s)" \
        || dt_crea="$(date -d "$dt_crea" +%s)"

      dt_closed="$(echo $list_dt_closed | cut -d " " -f $k)"
      [ "$OS" = "DARWIN" ] && dt_closed="$(date -j -u -f '%Y-%m-%dT%H:%M:%SZ' "$dt_closed" +%s)" \
        || dt_closed="$(date -d "$dt_closed" +%s)"

      if [ "$FILTER" = "ON" ]; then
        if [ "1" = "$(echo "$START_TIME <= $dt_crea" | bc -l)" ]; then
          if [ "1" = "$(echo "$END_TIME >= $dt_crea" | bc -l)" ]; then
            (( spent_time = dt_closed - dt_crea ))
            list_spent_time="$list_spent_time $spent_time"
          fi
        fi
      else
        (( spent_time = dt_closed - dt_crea ))
        list_spent_time="$list_spent_time $spent_time"
      fi
    done
    if [ "$FILTER" = "ON" ]; then
      # we do not catch anymore page because we get all PR for the period
      if [ "1" = "$(echo "$START_TIME > $dt_crea" | bc -l)" ]; then
        [ "$VERBOSE" = "1" ] && echo "[Stop fetching PR, other PR are out of the selected time]" >&2
        break
      fi
    fi
  done

  echo "$list_spent_time" | tr ' ' '\n' | sort -n | tr ' ' '\n'
}

github_pr_open() {
  repo="$1"
  list_spent_time=""
  list_dt_created=""

	last_page=$(curl -i -sL "https://api.github.com/repos/$repo/pulls?sort=created&state=open" | grep rel=\"last\" | cut -d "," -f 2 | cut -d "=" -f 4 | cut -d ">" -f 1)
  [ "$last_page" = "" ] && last_page=1
  [ "$VERBOSE" = "1" ] && echo "" >&2
  for i in $(seq 1 $last_page); do
    [ "$VERBOSE" = "1" ] && tput cuu 1 >&2 && tput el >&2 && echo "[Analysing PRs list: $i/$last_page]" >&2
    json="$(curl -sL "https://api.github.com/repos/$repo/pulls?sort=created&direction=desc&state=open&page=$i")"
		list_dt_created="$list_dt_created $(echo "$json" | grep -B2 "closed_at" | grep "created_at" | tr -d ' ' | cut -d "\"" -f 4)"

    for dt_crea in $list_dt_created; do
      [ "$OS" = "DARWIN" ] && dt_crea="$(date -j -u -f '%Y-%m-%dT%H:%M:%SZ' "$dt_crea" +%s)" \
                            || dt_crea="$(date -d "$dt_crea" +%s)"

      if [ "$FILTER" = "ON" ]; then
        if [ "1" = "$(echo "$START_TIME <= $dt_crea" | bc -l)" ]; then
          if [ "1" = "$(echo "$END_TIME >= $dt_crea" | bc -l)" ]; then
            (( spent_time = NOW - dt_crea ))
            list_spent_time="$list_spent_time $spent_time"
          fi
        fi
      else
        (( spent_time = NOW - dt_crea ))
        list_spent_time="$list_spent_time $spent_time"
      fi
    done
    if [ "$FILTER" = "ON" ]; then
      # we do not catch anymore page because we get all PR for the period
      if [ "1" = "$(echo "$START_TIME > $dt_crea" | bc -l)" ]; then
        [ "$VERBOSE" = "1" ] && echo "[Stop fetching PRs, next will be out of the selected time]" >&2
        break
      fi
    fi

  done

  echo "$list_spent_time" | tr ' ' '\n' | sort -n | tr ' ' '\n'
}


median_list() {
  list="$@"
  len_list="$#"
  median=""

  if [ $len_list -eq 1 ]; then
    middle_list=1
    middle_list_next=1
  else
    (( middle_list = len_list / 2 ))
    (( middle_list_next = middle_list + 1 ))
  fi

  j=1
  for l in $list; do
    [ $j -eq $middle_list ] && median=$l
    if [ $j -eq $middle_list_next ]; then
      median=$(echo "scale=2; ($median + $l)/2" | bc -l)
      break
    fi
    (( j = j + 1 ))
  done
  echo "$median"
}

convert_sec() {
  secs="$1"
  printf '%dd %02dh:%02dm:%02ds\n' $(($secs/86400)) $(($secs%86400/3600)) $(($secs%3600/60)) $(($secs%60))
}

five_nb_summary() {
  list="$@"
  len_list="$#"
  sample_minimum=""
  lower_quartile=""
  median=""
  upper_quartile=""
  sample_maximum=""


  if [ $len_list -gt 0 ]; then
    # NOTE list is already sorted asc
    sample_minimum=$(echo $list | cut -d " " -f 1)
    sample_maximum=$(echo $list | cut -d " " -f $len_list)

    median="$(median_list $list)"
    for l in $list; do
      # Q1
      [ "1" = "$(echo "$l <= $median" | bc -l)" ] && q1_list="$q1_list $l"
      # Q3
      [ "1" = "$(echo "$l >= $median" | bc -l)" ] && q3_list="$q3_list $l"
    done

    lower_quartile="$(median_list $q1_list | cut -d '.' -f 1)"
    median="$(echo $median | cut -d '.' -f 1)"
    upper_quartile="$(median_list $q3_list | cut -d '.' -f 1)"
  fi

  [ "$VERBOSE" = "1" ] && echo "[Stats computed on $len_list PR$FILTER_TEXT]" >&2
  printf "\n"
  printf "%16s | %16s | %16s | %16s | %16s" "Min" "Q1" "Mean" "Q3" "Max"
  printf "\n"
  [ $len_list -gt 0 ] && printf "%16s | %16s | %16s | %16s | %16s" "$(convert_sec "$sample_minimum")" "$(convert_sec "$lower_quartile")" "$(convert_sec "$median")" "$(convert_sec "$upper_quartile")" "$(convert_sec "$sample_maximum")"
  printf "\n"
}


# MAIN ----------------------------

if [ "$1" = "-h" ]; then
  usage
  exit
fi

case $1 in
  merge|open )
    MODE=$1
    ;;
esac

if [ "$VERBOSE" = "1" ]; then
  echo "[Fetching $MODE PRs stats from $GITHUB_REPO]" >&2
fi

list_time="$(github_pr_$MODE $GITHUB_REPO)"
five_nb_summary $list_time