375 lines
22 KiB
Plaintext
375 lines
22 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Exploring the commit graph\n",
|
|
"\n",
|
|
"(2018-02-08)\n",
|
|
"\n",
|
|
"This script crawls a repository's commit graph by inspecting files\n",
|
|
"currently in the repository and looking at all commits that have changed\n",
|
|
"that file (tracking over renames). The data is lightly processed and is\n",
|
|
"serialized to JSON to be displayed by a frontend."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import collections\n",
|
|
"import datetime\n",
|
|
"import json\n",
|
|
"import math\n",
|
|
"import os\n",
|
|
"import subprocess\n",
|
|
"import tempfile\n",
|
|
"\n",
|
|
"import git\n",
|
|
"\n",
|
|
"import matplotlib.pyplot as plt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class RepoContext:\n",
|
|
"\n",
|
|
" def __init__(self, repo_root):\n",
|
|
" self._repo_root = repo_root\n",
|
|
" \n",
|
|
" def _tempfile_name(self):\n",
|
|
" \"\"\"Get a persistent tempfile name keyed against this repo.\"\"\"\n",
|
|
" # Encode the path to the repo in a format that can be used in paths.\n",
|
|
" sanitized_repo_name = self._repo_root.replace(os.sep, '%')\n",
|
|
" filename = 'file_to_hashes_%s.json' % sanitized_repo_name\n",
|
|
" return os.path.join(tempfile.gettempdir(), filename)\n",
|
|
"\n",
|
|
" def _git_cmd(self, args):\n",
|
|
" \"\"\"Run git(1) with the specified arguments; return its stdout.\"\"\"\n",
|
|
" return subprocess.check_output(['git', '-C', self._repo_root] + args)\n",
|
|
"\n",
|
|
" def hashes_for_file(self, current_filename):\n",
|
|
" assert current_filename, current_filename\n",
|
|
" # Obviously really inefficient to spawn a suprocess for each file\n",
|
|
" # in the repo, but the `git` module doesn't seem to have an easy way\n",
|
|
" # to `--follow`... if productionized, this could just use libgit2\n",
|
|
" # bindings for performance.\n",
|
|
" hashes = (\n",
|
|
" self._git_cmd(['log', '--pretty=%H', '--follow', current_filename])\n",
|
|
" ).splitlines()\n",
|
|
" return [h.decode('ascii') for h in hashes]\n",
|
|
"\n",
|
|
" def all_files(self):\n",
|
|
" \"\"\"Get a list of all paths to files in the git repo.\n",
|
|
" \n",
|
|
" Assumes that filenames are UTF-8--encoded, which seems reasonable.\n",
|
|
" Alternately, could return a list of `bytes` objects.\n",
|
|
" \"\"\"\n",
|
|
" file_bytestring_names = self._git_cmd([\n",
|
|
" 'ls-tree', '-r', '--full-tree', '--name-only', '-z', 'HEAD'\n",
|
|
" ]).split(b'\\0')[:-1] # strip trailing '\\0'\n",
|
|
" return [fn.decode('utf-8') for fn in file_bytestring_names]\n",
|
|
"\n",
|
|
" def load_commit_data(self):\n",
|
|
" \"\"\"Load data about the relationships between commits and files.\n",
|
|
"\n",
|
|
" A commit is _file-reachable_ if it touches a file that currently\n",
|
|
" exists in the repository, even if that file might have been renamed\n",
|
|
" (but not if it has been deleted).\n",
|
|
"\n",
|
|
" Returns a dictionary `r` such that\n",
|
|
" - `r['file_to_commits']` is a dictionary `d` such that if `f` is a\n",
|
|
" path to a file currently tracked in the repository, then `d[f]`\n",
|
|
" is the list of commit SHAs for all commits that have touched `f`\n",
|
|
" (tracking across renames of `f`);\n",
|
|
" - `r['files']` is a list of all files currently in the repository\n",
|
|
" (and is the keyset of `r['file_to_commits']`;\n",
|
|
" - `r['hashes']` is a list of hashes of all file-reachable commits\n",
|
|
" (and is the union of the values of `r['file_to_commits']`;\n",
|
|
" - `r['commits']` is a dictionary `d` such that if `h` is the hash\n",
|
|
" of a file-reachable commit, then `d[h]` is a gitpython `Commit`\n",
|
|
" descriptor for the commit with hash `h`; and\n",
|
|
" - `r['repo']` is a gitpython `Repo` object for the repository.\n",
|
|
" \"\"\"\n",
|
|
" print('Starting: %s' % self._repo_root)\n",
|
|
" all_files = self.all_files()\n",
|
|
" print('Got %s files' % len(all_files))\n",
|
|
"\n",
|
|
" repo = git.Repo(self._repo_root)\n",
|
|
" head = repo.commit().hexsha\n",
|
|
"\n",
|
|
" # `files[x] = cs` where `cs` is the list of hashes that touched the\n",
|
|
" # file now known as `x`\n",
|
|
" try:\n",
|
|
" print('Loading file database')\n",
|
|
" with open(self._tempfile_name()) as infile:\n",
|
|
" result = json.load(infile)\n",
|
|
" assert result['base'] == self._repo_root, (\n",
|
|
" 'Cache for wrong repo: expected %r, got %r' %\n",
|
|
" (self._repo_root, result['base']))\n",
|
|
" assert result['head'] == head, (\n",
|
|
" 'Cache for wrong HEAD commit: expected %r, got %r'\n",
|
|
" % (head, result['head']))\n",
|
|
" files = result['files']\n",
|
|
" print('Loaded file database')\n",
|
|
" except (OSError, json.decoder.JSONDecodeError):\n",
|
|
" # Build the cache\n",
|
|
" print('Compiling file database')\n",
|
|
" files = {fn: self.hashes_for_file(fn) for fn in all_files}\n",
|
|
" print('Compiled file database')\n",
|
|
" cache = {\n",
|
|
" 'base': self._repo_root,\n",
|
|
" 'head': head,\n",
|
|
" 'files': files,\n",
|
|
" }\n",
|
|
" print('Created cache for file database')\n",
|
|
" with open(self._tempfile_name(), 'w') as outfile:\n",
|
|
" json.dump(cache, outfile)\n",
|
|
" print('Dumped cache for file database')\n",
|
|
"\n",
|
|
" all_hashes = frozenset().union(*files.values())\n",
|
|
" print('Got %s hashes' % len(all_hashes))\n",
|
|
"\n",
|
|
" # `commits[h] == c` s.t. `c.hash == h`\n",
|
|
" commits = {h: repo.commit(h) for h in all_hashes}\n",
|
|
" print('Got %s commits' % len(commits))\n",
|
|
" \n",
|
|
" return {\n",
|
|
" 'file_to_commits': files,\n",
|
|
" 'hashes': all_hashes,\n",
|
|
" 'files': all_files,\n",
|
|
" 'commits': commits,\n",
|
|
" 'repo': repo,\n",
|
|
" }"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Starting: /home/dandelion/git/tensorboard\n",
|
|
"Got 698 files\n",
|
|
"Loading file database\n",
|
|
"Loaded file database\n",
|
|
"Got 1323 hashes\n",
|
|
"Got 1323 commits\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"data = RepoContext('/home/dandelion/git/tensorboard').load_commit_data()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Which commits are important?\n",
|
|
"def _commit_weight(commit):\n",
|
|
" return math.log1p(commit.stats.total['lines'])\n",
|
|
"\n",
|
|
"commit_weights = {\n",
|
|
" h: _commit_weight(data['commits'][h]) for h in data['commits']\n",
|
|
"}\n",
|
|
"weight_values = sorted(commit_weights.values())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"(array([ 1., 31., 0., 122., 59., 84., 79., 82., 85., 78., 71.,\n",
|
|
" 84., 63., 61., 118., 71., 71., 55., 39., 16., 11., 6.,\n",
|
|
" 13., 5., 9., 1., 2., 1., 2., 3.]),\n",
|
|
" array([ 0. , 0.35119409, 0.70238817, 1.05358226, 1.40477635,\n",
|
|
" 1.75597043, 2.10716452, 2.45835861, 2.80955269, 3.16074678,\n",
|
|
" 3.51194086, 3.86313495, 4.21432904, 4.56552312, 4.91671721,\n",
|
|
" 5.2679113 , 5.61910538, 5.97029947, 6.32149356, 6.67268764,\n",
|
|
" 7.02388173, 7.37507582, 7.7262699 , 8.07746399, 8.42865807,\n",
|
|
" 8.77985216, 9.13104625, 9.48224033, 9.83343442, 10.18462851,\n",
|
|
" 10.53582259]),\n",
|
|
" <a list of 30 Patch objects>)"
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADqpJREFUeJzt3X+snmV9x/H3Z1RUMAZYDwRbsoNJp7IlBnNCUBJDqMlQCOUPSSDOdYykWcIUnYsU9wf/zKRmxh/LFpYG0JoRkFUWiDAn6yBkf9Dt8CMKFEIDrByp9BgHOjXDzu/+OHeTQzntOX3u5+E5z9X3K2me+76e637u7522n3P1eu77aqoKSVK7fmvcBUiSRsugl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDVuzbgLAFi7dm1NT0+PuwxJmiiPPPLIT6pqarl+qyLop6enmZ2dHXcZkjRRkvzXSvo5dSNJjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklq3LJBn+TWJAeSPLGo7a+TPJ3kB0n+Kckpi967IcneJM8k+YNRFS5JWpmVPBn7TeBvgW8tarsfuKGqDib5EnADcH2Sc4Argd8D3gX8a5Lfrar/G27Zq8P01ntX1O+FbZeMuBJJOrJlR/RV9RDw08Pavl9VB7vdh4H13fYm4I6q+t+qeh7YC5w3xHolScdoGHP0fwL8c7e9Dnhx0XtzXZskaUx6BX2SvwQOArcdalqiWx3h2C1JZpPMzs/P9ylDknQUAwd9ks3ApcAnqupQmM8BZy3qth54aanjq2p7Vc1U1czU1LKrbEqSBjRQ0Ce5GLgeuKyqfrnorXuAK5O8NcnZwAbgP/qXKUka1LJ33SS5HbgQWJtkDriRhbts3grcnwTg4ar606p6MsmdwFMsTOlc2+odN5I0KZYN+qq6aonmW47S/4vAF/sUJY3aSm+NBW+P1eTzyVhJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJatyyQZ/k1iQHkjyxqO20JPcnebZ7PbVrT5K/SbI3yQ+SfGCUxUuSlreSEf03gYsPa9sK7KqqDcCubh/go8CG7tcW4KbhlClJGtSyQV9VDwE/Pax5E7Cj294BXL6o/Vu14GHglCRnDqtYSdKxG3SO/oyq2g/QvZ7eta8DXlzUb65rkySNybC/jM0SbbVkx2RLktkks/Pz80MuQ5J0yKBB//KhKZnu9UDXPgectajfeuClpT6gqrZX1UxVzUxNTQ1YhiRpOYMG/T3A5m57M3D3ovY/6u6+OR949dAUjyRpPNYs1yHJ7cCFwNokc8CNwDbgziTXAPuAK7ru9wEfA/YCvwSuHkHNkqRjsGzQV9VVR3hr4xJ9C7i2b1GSpOHxyVhJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxq0ZdwFaHaa33ruifi9su2TElUgaNoN+FTkew/Z4vGbpzebUjSQ1zqCXpMb1Cvokn03yZJInktye5G1Jzk6yO8mzSb6d5MRhFStJOnYDz9EnWQd8Gjinqn6V5E7gSuBjwFer6o4kfw9cA9w0lGp1TFY6/y2pbX2nbtYAb0+yBjgJ2A9cBOzs3t8BXN7zHJKkHgYO+qr6EfBlYB8LAf8q8AjwSlUd7LrNAev6FilJGtzAQZ/kVGATcDbwLuBk4KNLdK0jHL8lyWyS2fn5+UHLkCQto8/UzUeA56tqvqp+DdwFfAg4pZvKAVgPvLTUwVW1vapmqmpmamqqRxmSpKPp88DUPuD8JCcBvwI2ArPAA8DHgTuAzcDdfYvU6/klq6RjMXDQV9XuJDuBR4GDwGPAduBe4I4kf9W13TKMQrU6+CSrNHl6LYFQVTcCNx7W/BxwXp/PlSQNj0/GSlLjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWpcryUQpCNpaeG1YV+L6wDpzeaIXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXGuXqmJsNIVJF0ZUnqjXiP6JKck2Znk6SR7knwwyWlJ7k/ybPd66rCKlSQdu75TN18HvldV7wXeD+wBtgK7qmoDsKvblySNycBTN0neCXwY+GOAqnoNeC3JJuDCrtsO4EHg+j5FSivV0n94Ig1LnxH9u4F54BtJHktyc5KTgTOqaj9A93r6EOqUJA2oT9CvAT4A3FRV5wK/4BimaZJsSTKbZHZ+fr5HGZKko+kT9HPAXFXt7vZ3shD8Lyc5E6B7PbDUwVW1vapmqmpmamqqRxmSpKMZeI6+qn6c5MUk76mqZ4CNwFPdr83Atu717qFUOsGcN5Y0Tn3vo/8UcFuSE4HngKtZ+FfCnUmuAfYBV/Q8hySph15BX1WPAzNLvLWxz+dKkobHJRAkqXEGvSQ1zqCXpMa5qJn0JnOBNr3ZHNFLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNa530Cc5IcljSb7b7Z+dZHeSZ5N8O8mJ/cuUJA1qGCP664A9i/a/BHy1qjYA/w1cM4RzSJIG1Cvok6wHLgFu7vYDXATs7LrsAC7vcw5JUj99R/RfAz4P/Kbb/23glao62O3PAet6nkOS1MPAQZ/kUuBAVT2yuHmJrnWE47ckmU0yOz8/P2gZkqRl9BnRXwBcluQF4A4Wpmy+BpySZE3XZz3w0lIHV9X2qpqpqpmpqakeZUiSjmbN8l2WVlU3ADcAJLkQ+Iuq+kSSfwQ+zkL4bwbuHkKd0nFneuu9K+r3wrZLRlyJJt0o7qO/HvjzJHtZmLO/ZQTnkCSt0MAj+sWq6kHgwW77OeC8YXyuJKk/n4yVpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1LihLIEwKVwkStLxyBG9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWrcwEGf5KwkDyTZk+TJJNd17acluT/Js93rqcMrV5J0rPqM6A8Cn6uq9wHnA9cmOQfYCuyqqg3Arm5fkjQmAwd9Ve2vqke77Z8De4B1wCZgR9dtB3B53yIlSYMbyhx9kmngXGA3cEZV7YeFHwbA6cM4hyRpML2DPsk7gO8An6mqnx3DcVuSzCaZnZ+f71uGJOkIegV9krewEPK3VdVdXfPLSc7s3j8TOLDUsVW1vapmqmpmamqqTxmSpKPoc9dNgFuAPVX1lUVv3QNs7rY3A3cPXp4kqa81PY69APgk8MMkj3dtXwC2AXcmuQbYB1zRr0RJUh8DB31V/TuQI7y9cdDPlSQNl0/GSlLjDHpJalyfOXpJq8D01ntX1O+FbZeMuBKtVo7oJalxBr0kNc6gl6TGGfSS1Di/jJU0ML8IngyO6CWpcQa9JDXOqRvpOOE0y/HLEb0kNc6gl6TGGfSS1Djn6CWtGn6PMBqO6CWpcQa9JDXOoJekxhn0ktQ4v4yV9Dor/UJUk8OglzRy/vAYL6duJKlxBr0kNc6gl6TGOUcvSSOwmp7yHVnQJ7kY+DpwAnBzVW0b1bkkaSnDDttJ/VJ5JFM3SU4A/g74KHAOcFWSc0ZxLknS0Y1qRH8esLeqngNIcgewCXhq2Cea1J+wkgY37L/3refIqL6MXQe8uGh/rmuTJL3JRjWizxJt9boOyRZgS7f7P0meGfBca4GfDHjskvKlYX7a0Az9Olcpr7Mtx8N19rrGnnnzOyvpNKqgnwPOWrS/HnhpcYeq2g5s73uiJLNVNdP3c1Y7r7MtXmc7JuEaRzV185/AhiRnJzkRuBK4Z0TnkiQdxUhG9FV1MMmfAf/Cwu2Vt1bVk6M4lyTp6EZ2H31V3QfcN6rPX6T39M+E8Drb4nW2Y9VfY6pq+V6SpInlWjeS1LiJDvokFyd5JsneJFvHXc8oJDkryQNJ9iR5Msl1465pVJKckOSxJN8ddy2jkuSUJDuTPN39nn5w3DWNQpLPdn9en0hye5K3jbumYUhya5IDSZ5Y1HZakvuTPNu9njrOGpcysUF/HC2zcBD4XFW9DzgfuLbR6wS4Dtgz7iJG7OvA96rqvcD7afB6k6wDPg3MVNXvs3BDxpXjrWpovglcfFjbVmBXVW0AdnX7q8rEBj2LllmoqteAQ8ssNKWq9lfVo932z1kIhuaeMk6yHrgEuHnctYxKkncCHwZuAaiq16rqlfFWNTJrgLcnWQOcxGHP0UyqqnoI+OlhzZuAHd32DuDyN7WoFZjkoD/ulllIMg2cC+webyUj8TXg88Bvxl3ICL0bmAe+0U1R3Zzk5HEXNWxV9SPgy8A+YD/walV9f7xVjdQZVbUfFgZmwOljrucNJjnol11moSVJ3gF8B/hMVf1s3PUMU5JLgQNV9ci4axmxNcAHgJuq6lzgF6zCf+b31c1RbwLOBt4FnJzkD8db1fFtkoN+2WUWWpHkLSyE/G1Vdde46xmBC4DLkrzAwhTcRUn+YbwljcQcMFdVh/5FtpOF4G/NR4Dnq2q+qn4N3AV8aMw1jdLLSc4E6F4PjLmeN5jkoD8ulllIEhbmdPdU1VfGXc8oVNUNVbW+qqZZ+H38t6pqbgRYVT8GXkzynq5pIyNYunsV2Aecn+Sk7s/vRhr80nmRe4DN3fZm4O4x1rKkif2vBI+jZRYuAD4J/DDJ413bF7onjzV5PgXc1g1OngOuHnM9Q1dVu5PsBB5l4a6xx5iAp0dXIsntwIXA2iRzwI3ANuDOJNew8EPuivFVuDSfjJWkxk3y1I0kaQUMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGvf/9NqA3UwXndMAAAAASUVORK5CYII=\n",
|
|
"text/plain": [
|
|
"<matplotlib.figure.Figure at 0x7fb4b53296a0>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Sanity check...\n",
|
|
"plt.hist(commit_weights.values(), bins=30)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Which users are important?\n",
|
|
"user_weights = collections.defaultdict(lambda: 0.0)\n",
|
|
"email_resolutions = {\n",
|
|
" 'danmane@gmail.com': 'dandelion@google.com',\n",
|
|
"}\n",
|
|
"for h in data['commits']:\n",
|
|
" nominal_email = data['commits'][h].author.email\n",
|
|
" email = email_resolutions.get(nominal_email, nominal_email)\n",
|
|
" user_weights[email] += commit_weights[h]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"(array([104., 5., 2., 1., 1., 1., 1., 1., 2., 2.]),\n",
|
|
" array([ 0.69314718, 57.62184883, 114.55055049, 171.47925214,\n",
|
|
" 228.4079538 , 285.33665545, 342.2653571 , 399.19405876,\n",
|
|
" 456.12276041, 513.05146206, 569.98016372]),\n",
|
|
" <a list of 10 Patch objects>)"
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADeZJREFUeJzt3WuMXHd5x/HvrzHh2ja3TeTGUTeoVguqykWrNDRVRRNahYtIXgQJhIqFLPkNbUNBAqeVivoukSoSkCpUi1BcCXFpSpUooNLIBFV9UdM1CSTBpDZpGty48SKS0IvUkvL0xRyjlVl713N2M56H70canTn/+c+c55HHvz3+75lxqgpJUl8/NesCJElby6CXpOYMeklqzqCXpOYMeklqzqCXpOYMeklqzqCXpOYMeklqbtusCwC45JJLanFxcdZlSNJcOXTo0HeramG9eedE0C8uLrK8vDzrMiRpriT5143Mc+lGkpoz6CWpOYNekpoz6CWpOYNekpoz6CWpOYNekpoz6CWpOYNekpo7Jz4ZO8bi3i/M7NiP3/rmmR1bkjbKM3pJas6gl6TmDHpJam7doE/yiSQnkjy8auyiJPclOTJsLxzGk+SjSY4m+UaS125l8ZKk9W3kjP6TwPWnjO0FDlTVTuDAsA/wRmDncNsDfGxzypQkTWvdoK+qvwe+d8rwDcD+4f5+4MZV439ZE/8IXJBk+2YVK0k6e9Ou0V9WVccBhu2lw/jlwHdWzTs2jEmSZmSzfxmbNcZqzYnJniTLSZZXVlY2uQxJ0knTBv1TJ5dkhu2JYfwYcMWqeTuAJ9d6garaV1VLVbW0sLDuf3koSZrStEF/D7BruL8LuHvV+LuGq2+uBp49ucQjSZqNdb8CIcmngdcDlyQ5BnwIuBX4XJLdwBPA24bpXwTeBBwF/ht49xbULEk6C+sGfVW94zQPXbfG3ALeM7YoSdLm8ZOxktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktTcqKBP8gdJHknycJJPJ3lRkiuTHExyJMlnk5y/WcVKks7e1EGf5HLg94Glqvpl4Dzg7cBtwO1VtRN4Gti9GYVKkqYzdulmG/DiJNuAlwDHgWuBu4bH9wM3jjyGJGmEqYO+qv4N+FPgCSYB/yxwCHimqp4bph0DLh9bpCRpemOWbi4EbgCuBH4OeCnwxjWm1mmevyfJcpLllZWVacuQJK1jzNLNG4B/qaqVqvoB8Hng14ALhqUcgB3Ak2s9uar2VdVSVS0tLCyMKEOSdCZjgv4J4OokL0kS4Drgm8D9wE3DnF3A3eNKlCSNMWaN/iCTX7p+DXhoeK19wAeB9yU5ClwM3LkJdUqSprRt/SmnV1UfAj50yvBjwFVjXleStHn8ZKwkNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNTcq6JNckOSuJN9KcjjJ65JclOS+JEeG7YWbVawk6eyNPaP/CPC3VfVLwKuAw8Be4EBV7QQODPuSpBmZOuiT/AzwG8CdAFX1v1X1DHADsH+Yth+4cWyRkqTpjTmjfzmwAvxFkgeSfDzJS4HLquo4wLC9dK0nJ9mTZDnJ8srKyogyJElnMibotwGvBT5WVa8B/ouzWKapqn1VtVRVSwsLCyPKkCSdyZigPwYcq6qDw/5dTIL/qSTbAYbtiXElSpLGmDroq+rfge8k+cVh6Drgm8A9wK5hbBdw96gKJUmjbBv5/N8DPpXkfOAx4N1Mfnh8Lslu4AngbSOPIUkaYVTQV9WDwNIaD1035nUlSZvHT8ZKUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Nzrok5yX5IEk9w77VyY5mORIks8mOX98mZKkaW3GGf3NwOFV+7cBt1fVTuBpYPcmHEOSNKVRQZ9kB/Bm4OPDfoBrgbuGKfuBG8ccQ5I0ztgz+juADwA/HPYvBp6pqueG/WPA5SOPIUkaYeqgT/IW4ERVHVo9vMbUOs3z9yRZTrK8srIybRmSpHWMOaO/BnhrkseBzzBZsrkDuCDJtmHODuDJtZ5cVfuqaqmqlhYWFkaUIUk6k6mDvqpuqaodVbUIvB34clW9E7gfuGmYtgu4e3SVkqSpbcV19B8E3pfkKJM1+zu34BiSpA3atv6U9VXVV4CvDPcfA67ajNeVJI3nJ2MlqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqbmpgz7JFUnuT3I4ySNJbh7GL0pyX5Ijw/bCzStXknS2xpzRPwe8v6peAVwNvCfJK4G9wIGq2gkcGPYlSTMyddBX1fGq+tpw/z+Aw8DlwA3A/mHafuDGsUVKkqa3KWv0SRaB1wAHgcuq6jhMfhgAl27GMSRJ0xkd9EleBvw18N6q+v5ZPG9PkuUkyysrK2PLkCSdxqigT/ICJiH/qar6/DD8VJLtw+PbgRNrPbeq9lXVUlUtLSwsjClDknQGY666CXAncLiqPrzqoXuAXcP9XcDd05cnSRpr24jnXgP8DvBQkgeHsT8EbgU+l2Q38ATwtnElSpLGmDroq+ofgJzm4eumfV1J0ubyk7GS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1Ny2WRcwzxb3fmEmx3381jfP5LiS5pNn9JLUnEEvSc0Z9JLUnEEvSc0Z9JLU3JZcdZPkeuAjwHnAx6vq1q04zk+qWV3tA17xo601y/f2rDwff6c2/Yw+yXnAnwFvBF4JvCPJKzf7OJKkjdmKM/qrgKNV9RhAks8ANwDf3IJj6Xn2k3jGJc27rVijvxz4zqr9Y8OYJGkGtuKMPmuM1Y9NSvYAe4bd/0zy6JTHuwT47pTPPVd17Al69tWxJ+jZ1znZU24b9fSf38ikrQj6Y8AVq/Z3AE+eOqmq9gH7xh4syXJVLY19nXNJx56gZ18de4KefXXsaaO2Yunmn4CdSa5Mcj7wduCeLTiOJGkDNv2MvqqeS/K7wJeYXF75iap6ZLOPI0namC25jr6qvgh8cSteew2jl3/OQR17gp59dewJevbVsacNSdWP/Z5UktSIX4EgSc3NbdAnuT7Jo0mOJtk763rORpJPJDmR5OFVYxcluS/JkWF74TCeJB8d+vxGktfOrvLTS3JFkvuTHE7ySJKbh/F57+tFSb6a5OtDX38yjF+Z5ODQ12eHCw9I8sJh/+jw+OIs6z+TJOcleSDJvcN+h54eT/JQkgeTLA9jc/0e3AxzGfQNvmbhk8D1p4ztBQ5U1U7gwLAPkx53Drc9wMeepxrP1nPA+6vqFcDVwHuGP5N57+t/gGur6lXAq4Hrk1wN3AbcPvT1NLB7mL8beLqqfgG4fZh3rroZOLxqv0NPAL9ZVa9edSnlvL8Hx6uqubsBrwO+tGr/FuCWWdd1lj0sAg+v2n8U2D7c3w48Otz/c+Ada807l2/A3cBvdeoLeAnwNeBXmXzwZtsw/qP3I5OrzV433N82zMusa1+jlx1MQu9a4F4mH3Sc656G+h4HLjllrM17cNrbXJ7R0/NrFi6rquMAw/bSYXzueh3+af8a4CAN+hqWOB4ETgD3Ad8Gnqmq54Ypq2v/UV/D488CFz+/FW/IHcAHgB8O+xcz/z3B5FP4f5fk0PDpe2jwHhxrXv9z8A19zUITc9VrkpcBfw28t6q+n6xV/mTqGmPnZF9V9X/Aq5NcAPwN8Iq1pg3bc76vJG8BTlTVoSSvPzm8xtS56WmVa6rqySSXAvcl+dYZ5s5TX6PM6xn9hr5mYc48lWQ7wLA9MYzPTa9JXsAk5D9VVZ8fhue+r5Oq6hngK0x+B3FBkpMnSqtr/1Ffw+M/C3zv+a10XdcAb03yOPAZJss3dzDfPQFQVU8O2xNMfihfRaP34LTmNeg7fs3CPcCu4f4uJmvcJ8ffNVwhcDXw7Ml/hp5LMjl1vxM4XFUfXvXQvPe1MJzJk+TFwBuY/ALzfuCmYdqpfZ3s9ybgyzUsAJ8rquqWqtpRVYtM/u58uareyRz3BJDkpUl++uR94LeBh5nz9+CmmPUvCaa9AW8C/pnJeukfzbqes6z908Bx4AdMzip2M1nzPAAcGbYXDXPD5AqjbwMPAUuzrv80Pf06k3/2fgN4cLi9qUFfvwI8MPT1MPDHw/jLga8CR4G/Al44jL9o2D86PP7yWfewTn+vB+7t0NNQ/9eH2yMnc2He34ObcfOTsZLU3Lwu3UiSNsigl6TmDHpJas6gl6TmDHpJas6gl6TmDHpJas6gl6Tm/h8gyLdRgbyIwAAAAABJRU5ErkJggg==\n",
|
|
"text/plain": [
|
|
"<matplotlib.figure.Figure at 0x7fb4b5329320>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Sanity check...\n",
|
|
"plt.hist(user_weights.values())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[('dandelion@google.com', 569.9801637172158),\n",
|
|
" ('jart@google.com', 545.0875820097374),\n",
|
|
" ('zeng.chi@gmail.com', 474.4941786809108),\n",
|
|
" ('gardener@tensorflow.org', 472.63110233817844),\n",
|
|
" ('smilkov@google.com', 434.80813797887964),\n",
|
|
" ('wchargin@gmail.com', 393.3191133762563),\n",
|
|
" ('nicholsonc@google.com', 335.9915566820012),\n",
|
|
" ('nsthorat@google.com', 233.81765373473377),\n",
|
|
" ('nobody@tensorflow.org', 202.53648716913858),\n",
|
|
" ('dsmilkov@gmail.com', 131.64270699045093)]"
|
|
]
|
|
},
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Accuracy check, given that the operator is familiar with the repo...\n",
|
|
"[(k, user_weights[k]) for k in sorted(user_weights, key=user_weights.get, reverse=True)][:10]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Build a JSON object to send to the frontend.\n",
|
|
"results = {\n",
|
|
" \"fileToCommits\": data['file_to_commits'],\n",
|
|
" \"commits\": {\n",
|
|
" c.hexsha: {\n",
|
|
" \"author\": c.author.email,\n",
|
|
" \"stats\": c.stats.files,\n",
|
|
" }\n",
|
|
" for c in data['commits'].values()\n",
|
|
" },\n",
|
|
"}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"with open('/tmp/data.json', 'w') as outfile:\n",
|
|
" json.dump(results, outfile)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.4"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|