sourcecred/experiments/01_commit_graph_dump.ipynb

375 lines
22 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Exploring the commit graph\n",
"\n",
"(2018-02-08)\n",
"\n",
"This script crawls a repository's commit graph by inspecting files\n",
"currently in the repository and looking at all commits that have changed\n",
"that file (tracking over renames). The data is lightly processed and is\n",
"serialized to JSON to be displayed by a frontend."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import collections\n",
"import datetime\n",
"import json\n",
"import math\n",
"import os\n",
"import subprocess\n",
"import tempfile\n",
"\n",
"import git\n",
"\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"class RepoContext:\n",
"\n",
" def __init__(self, repo_root):\n",
" self._repo_root = repo_root\n",
" \n",
" def _tempfile_name(self):\n",
" \"\"\"Get a persistent tempfile name keyed against this repo.\"\"\"\n",
" # Encode the path to the repo in a format that can be used in paths.\n",
" sanitized_repo_name = self._repo_root.replace(os.sep, '%')\n",
" filename = 'file_to_hashes_%s.json' % sanitized_repo_name\n",
" return os.path.join(tempfile.gettempdir(), filename)\n",
"\n",
" def _git_cmd(self, args):\n",
" \"\"\"Run git(1) with the specified arguments; return its stdout.\"\"\"\n",
" return subprocess.check_output(['git', '-C', self._repo_root] + args)\n",
"\n",
" def hashes_for_file(self, current_filename):\n",
" assert current_filename, current_filename\n",
" # Obviously really inefficient to spawn a suprocess for each file\n",
" # in the repo, but the `git` module doesn't seem to have an easy way\n",
" # to `--follow`... if productionized, this could just use libgit2\n",
" # bindings for performance.\n",
" hashes = (\n",
" self._git_cmd(['log', '--pretty=%H', '--follow', current_filename])\n",
" ).splitlines()\n",
" return [h.decode('ascii') for h in hashes]\n",
"\n",
" def all_files(self):\n",
" \"\"\"Get a list of all paths to files in the git repo.\n",
" \n",
" Assumes that filenames are UTF-8--encoded, which seems reasonable.\n",
" Alternately, could return a list of `bytes` objects.\n",
" \"\"\"\n",
" file_bytestring_names = self._git_cmd([\n",
" 'ls-tree', '-r', '--full-tree', '--name-only', '-z', 'HEAD'\n",
" ]).split(b'\\0')[:-1] # strip trailing '\\0'\n",
" return [fn.decode('utf-8') for fn in file_bytestring_names]\n",
"\n",
" def load_commit_data(self):\n",
" \"\"\"Load data about the relationships between commits and files.\n",
"\n",
" A commit is _file-reachable_ if it touches a file that currently\n",
" exists in the repository, even if that file might have been renamed\n",
" (but not if it has been deleted).\n",
"\n",
" Returns a dictionary `r` such that\n",
" - `r['file_to_commits']` is a dictionary `d` such that if `f` is a\n",
" path to a file currently tracked in the repository, then `d[f]`\n",
" is the list of commit SHAs for all commits that have touched `f`\n",
" (tracking across renames of `f`);\n",
" - `r['files']` is a list of all files currently in the repository\n",
" (and is the keyset of `r['file_to_commits']`;\n",
" - `r['hashes']` is a list of hashes of all file-reachable commits\n",
" (and is the union of the values of `r['file_to_commits']`;\n",
" - `r['commits']` is a dictionary `d` such that if `h` is the hash\n",
" of a file-reachable commit, then `d[h]` is a gitpython `Commit`\n",
" descriptor for the commit with hash `h`; and\n",
" - `r['repo']` is a gitpython `Repo` object for the repository.\n",
" \"\"\"\n",
" print('Starting: %s' % self._repo_root)\n",
" all_files = self.all_files()\n",
" print('Got %s files' % len(all_files))\n",
"\n",
" repo = git.Repo(self._repo_root)\n",
" head = repo.commit().hexsha\n",
"\n",
" # `files[x] = cs` where `cs` is the list of hashes that touched the\n",
" # file now known as `x`\n",
" try:\n",
" print('Loading file database')\n",
" with open(self._tempfile_name()) as infile:\n",
" result = json.load(infile)\n",
" assert result['base'] == self._repo_root, (\n",
" 'Cache for wrong repo: expected %r, got %r' %\n",
" (self._repo_root, result['base']))\n",
" assert result['head'] == head, (\n",
" 'Cache for wrong HEAD commit: expected %r, got %r'\n",
" % (head, result['head']))\n",
" files = result['files']\n",
" print('Loaded file database')\n",
" except (OSError, json.decoder.JSONDecodeError):\n",
" # Build the cache\n",
" print('Compiling file database')\n",
" files = {fn: self.hashes_for_file(fn) for fn in all_files}\n",
" print('Compiled file database')\n",
" cache = {\n",
" 'base': self._repo_root,\n",
" 'head': head,\n",
" 'files': files,\n",
" }\n",
" print('Created cache for file database')\n",
" with open(self._tempfile_name(), 'w') as outfile:\n",
" json.dump(cache, outfile)\n",
" print('Dumped cache for file database')\n",
"\n",
" all_hashes = frozenset().union(*files.values())\n",
" print('Got %s hashes' % len(all_hashes))\n",
"\n",
" # `commits[h] == c` s.t. `c.hash == h`\n",
" commits = {h: repo.commit(h) for h in all_hashes}\n",
" print('Got %s commits' % len(commits))\n",
" \n",
" return {\n",
" 'file_to_commits': files,\n",
" 'hashes': all_hashes,\n",
" 'files': all_files,\n",
" 'commits': commits,\n",
" 'repo': repo,\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting: /home/dandelion/git/tensorboard\n",
"Got 698 files\n",
"Loading file database\n",
"Loaded file database\n",
"Got 1323 hashes\n",
"Got 1323 commits\n"
]
}
],
"source": [
"data = RepoContext('/home/dandelion/git/tensorboard').load_commit_data()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Which commits are important?\n",
"def _commit_weight(commit):\n",
" return math.log1p(commit.stats.total['lines'])\n",
"\n",
"commit_weights = {\n",
" h: _commit_weight(data['commits'][h]) for h in data['commits']\n",
"}\n",
"weight_values = sorted(commit_weights.values())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([ 1., 31., 0., 122., 59., 84., 79., 82., 85., 78., 71.,\n",
" 84., 63., 61., 118., 71., 71., 55., 39., 16., 11., 6.,\n",
" 13., 5., 9., 1., 2., 1., 2., 3.]),\n",
" array([ 0. , 0.35119409, 0.70238817, 1.05358226, 1.40477635,\n",
" 1.75597043, 2.10716452, 2.45835861, 2.80955269, 3.16074678,\n",
" 3.51194086, 3.86313495, 4.21432904, 4.56552312, 4.91671721,\n",
" 5.2679113 , 5.61910538, 5.97029947, 6.32149356, 6.67268764,\n",
" 7.02388173, 7.37507582, 7.7262699 , 8.07746399, 8.42865807,\n",
" 8.77985216, 9.13104625, 9.48224033, 9.83343442, 10.18462851,\n",
" 10.53582259]),\n",
" <a list of 30 Patch objects>)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADqpJREFUeJzt3X+snmV9x/H3Z1RUMAZYDwRbsoNJp7IlBnNCUBJDqMlQCOUPSSDOdYykWcIUnYsU9wf/zKRmxh/LFpYG0JoRkFUWiDAn6yBkf9Dt8CMKFEIDrByp9BgHOjXDzu/+OHeTQzntOX3u5+E5z9X3K2me+76e637u7522n3P1eu77aqoKSVK7fmvcBUiSRsugl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDVuzbgLAFi7dm1NT0+PuwxJmiiPPPLIT6pqarl+qyLop6enmZ2dHXcZkjRRkvzXSvo5dSNJjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklq3LJBn+TWJAeSPLGo7a+TPJ3kB0n+Kckpi967IcneJM8k+YNRFS5JWpmVPBn7TeBvgW8tarsfuKGqDib5EnADcH2Sc4Argd8D3gX8a5Lfrar/G27Zq8P01ntX1O+FbZeMuBJJOrJlR/RV9RDw08Pavl9VB7vdh4H13fYm4I6q+t+qeh7YC5w3xHolScdoGHP0fwL8c7e9Dnhx0XtzXZskaUx6BX2SvwQOArcdalqiWx3h2C1JZpPMzs/P9ylDknQUAwd9ks3ApcAnqupQmM8BZy3qth54aanjq2p7Vc1U1czU1LKrbEqSBjRQ0Ce5GLgeuKyqfrnorXuAK5O8NcnZwAbgP/qXKUka1LJ33SS5HbgQWJtkDriRhbts3grcnwTg4ar606p6MsmdwFMsTOlc2+odN5I0KZYN+qq6aonmW47S/4vAF/sUJY3aSm+NBW+P1eTzyVhJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJatyyQZ/k1iQHkjyxqO20JPcnebZ7PbVrT5K/SbI3yQ+SfGCUxUuSlreSEf03gYsPa9sK7KqqDcCubh/go8CG7tcW4KbhlClJGtSyQV9VDwE/Pax5E7Cj294BXL6o/Vu14GHglCRnDqtYSdKxG3SO/oyq2g/QvZ7eta8DXlzUb65rkySNybC/jM0SbbVkx2RLktkks/Pz80MuQ5J0yKBB//KhKZnu9UDXPgectajfeuClpT6gqrZX1UxVzUxNTQ1YhiRpOYMG/T3A5m57M3D3ovY/6u6+OR949dAUjyRpPNYs1yHJ7cCFwNokc8CNwDbgziTXAPuAK7ru9wEfA/YCvwSuHkHNkqRjsGzQV9VVR3hr4xJ9C7i2b1GSpOHxyVhJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxq0ZdwFaHaa33ruifi9su2TElUgaNoN+FTkew/Z4vGbpzebUjSQ1zqCXpMb1Cvokn03yZJInktye5G1Jzk6yO8mzSb6d5MRhFStJOnYDz9EnWQd8Gjinqn6V5E7gSuBjwFer6o4kfw9cA9w0lGp1TFY6/y2pbX2nbtYAb0+yBjgJ2A9cBOzs3t8BXN7zHJKkHgYO+qr6EfBlYB8LAf8q8AjwSlUd7LrNAev6FilJGtzAQZ/kVGATcDbwLuBk4KNLdK0jHL8lyWyS2fn5+UHLkCQto8/UzUeA56tqvqp+DdwFfAg4pZvKAVgPvLTUwVW1vapmqmpmamqqRxmSpKPp88DUPuD8JCcBvwI2ArPAA8DHgTuAzcDdfYvU6/klq6RjMXDQV9XuJDuBR4GDwGPAduBe4I4kf9W13TKMQrU6+CSrNHl6LYFQVTcCNx7W/BxwXp/PlSQNj0/GSlLjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWpcryUQpCNpaeG1YV+L6wDpzeaIXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXGuXqmJsNIVJF0ZUnqjXiP6JKck2Znk6SR7knwwyWlJ7k/ybPd66rCKlSQdu75TN18HvldV7wXeD+wBtgK7qmoDsKvblySNycBTN0neCXwY+GOAqnoNeC3JJuDCrtsO4EHg+j5FSivV0n94Ig1LnxH9u4F54BtJHktyc5KTgTOqaj9A93r6EOqUJA2oT9CvAT4A3FRV5wK/4BimaZJsSTKbZHZ+fr5HGZKko+kT9HPAXFXt7vZ3shD8Lyc5E6B7PbDUwVW1vapmqmpmamqqRxmSpKMZeI6+qn6c5MUk76mqZ4CNwFPdr83Atu717qFUOsGcN5Y0Tn3vo/8UcFuSE4HngKtZ+FfCnUmuAfYBV/Q8hySph15BX1WPAzNLvLWxz+dKkobHJRAkqXEGvSQ1zqCXpMa5qJn0JnOBNr3ZHNFLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNa530Cc5IcljSb7b7Z+dZHeSZ5N8O8mJ/cuUJA1qGCP664A9i/a/BHy1qjYA/w1cM4RzSJIG1Cvok6wHLgFu7vYDXATs7LrsAC7vcw5JUj99R/RfAz4P/Kbb/23glao62O3PAet6nkOS1MPAQZ/kUuBAVT2yuHmJrnWE47ckmU0yOz8/P2gZkqRl9BnRXwBcluQF4A4Wpmy+BpySZE3XZz3w0lIHV9X2qpqpqpmpqakeZUiSjmbN8l2WVlU3ADcAJLkQ+Iuq+kSSfwQ+zkL4bwbuHkKd0nFneuu9K+r3wrZLRlyJJt0o7qO/HvjzJHtZmLO/ZQTnkCSt0MAj+sWq6kHgwW77OeC8YXyuJKk/n4yVpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1LihLIEwKVwkStLxyBG9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWrcwEGf5KwkDyTZk+TJJNd17acluT/Js93rqcMrV5J0rPqM6A8Cn6uq9wHnA9cmOQfYCuyqqg3Arm5fkjQmAwd9Ve2vqke77Z8De4B1wCZgR9dtB3B53yIlSYMbyhx9kmngXGA3cEZV7YeFHwbA6cM4hyRpML2DPsk7gO8An6mqnx3DcVuSzCaZnZ+f71uGJOkIegV9krewEPK3VdVdXfPLSc7s3j8TOLDUsVW1vapmqmpmamqqTxmSpKPoc9dNgFuAPVX1lUVv3QNs7rY3A3cPXp4kqa81PY69APgk8MMkj3dtXwC2AXcmuQbYB1zRr0RJUh8DB31V/TuQI7y9cdDPlSQNl0/GSlLjDHpJalyfOXpJq8D01ntX1O+FbZeMuBKtVo7oJalxBr0kNc6gl6TGGfSS1Di/jJU0ML8IngyO6CWpcQa9JDXOqRvpOOE0y/HLEb0kNc6gl6TGGfSS1Djn6CWtGn6PMBqO6CWpcQa9JDXOoJekxhn0ktQ4v4yV9Dor/UJUk8OglzRy/vAYL6duJKlxBr0kNc6gl6TGOUcvSSOwmp7yHVnQJ7kY+DpwAnBzVW0b1bkkaSnDDttJ/VJ5JFM3SU4A/g74KHAOcFWSc0ZxLknS0Y1qRH8esLeqngNIcgewCXhq2Cea1J+wkgY37L/3refIqL6MXQe8uGh/rmuTJL3JRjWizxJt9boOyRZgS7f7P0meGfBca4G
"text/plain": [
"<matplotlib.figure.Figure at 0x7fb4b53296a0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Sanity check...\n",
"plt.hist(commit_weights.values(), bins=30)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Which users are important?\n",
"user_weights = collections.defaultdict(lambda: 0.0)\n",
"email_resolutions = {\n",
" 'danmane@gmail.com': 'dandelion@google.com',\n",
"}\n",
"for h in data['commits']:\n",
" nominal_email = data['commits'][h].author.email\n",
" email = email_resolutions.get(nominal_email, nominal_email)\n",
" user_weights[email] += commit_weights[h]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([104., 5., 2., 1., 1., 1., 1., 1., 2., 2.]),\n",
" array([ 0.69314718, 57.62184883, 114.55055049, 171.47925214,\n",
" 228.4079538 , 285.33665545, 342.2653571 , 399.19405876,\n",
" 456.12276041, 513.05146206, 569.98016372]),\n",
" <a list of 10 Patch objects>)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADeZJREFUeJzt3WuMXHd5x/HvrzHh2ja3TeTGUTeoVguqykWrNDRVRRNahYtIXgQJhIqFLPkNbUNBAqeVivoukSoSkCpUi1BcCXFpSpUooNLIBFV9UdM1CSTBpDZpGty48SKS0IvUkvL0xRyjlVl713N2M56H70canTn/+c+c55HHvz3+75lxqgpJUl8/NesCJElby6CXpOYMeklqzqCXpOYMeklqzqCXpOYMeklqzqCXpOYMeklqbtusCwC45JJLanFxcdZlSNJcOXTo0HeramG9eedE0C8uLrK8vDzrMiRpriT5143Mc+lGkpoz6CWpOYNekpoz6CWpOYNekpoz6CWpOYNekpoz6CWpOYNekpo7Jz4ZO8bi3i/M7NiP3/rmmR1bkjbKM3pJas6gl6TmDHpJam7doE/yiSQnkjy8auyiJPclOTJsLxzGk+SjSY4m+UaS125l8ZKk9W3kjP6TwPWnjO0FDlTVTuDAsA/wRmDncNsDfGxzypQkTWvdoK+qvwe+d8rwDcD+4f5+4MZV439ZE/8IXJBk+2YVK0k6e9Ou0V9WVccBhu2lw/jlwHdWzTs2jEmSZmSzfxmbNcZqzYnJniTLSZZXVlY2uQxJ0knTBv1TJ5dkhu2JYfwYcMWqeTuAJ9d6garaV1VLVbW0sLDuf3koSZrStEF/D7BruL8LuHvV+LuGq2+uBp49ucQjSZqNdb8CIcmngdcDlyQ5BnwIuBX4XJLdwBPA24bpXwTeBBwF/ht49xbULEk6C+sGfVW94zQPXbfG3ALeM7YoSdLm8ZOxktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktTcqKBP8gdJHknycJJPJ3lRkiuTHExyJMlnk5y/WcVKks7e1EGf5HLg94Glqvpl4Dzg7cBtwO1VtRN4Gti9GYVKkqYzdulmG/DiJNuAlwDHgWuBu4bH9wM3jjyGJGmEqYO+qv4N+FPgCSYB/yxwCHimqp4bph0DLh9bpCRpemOWbi4EbgCuBH4OeCnwxjWm1mmevyfJcpLllZWVacuQJK1jzNLNG4B/qaqVqvoB8Hng14ALhqUcgB3Ak2s9uar2VdVSVS0tLCyMKEOSdCZjgv4J4OokL0kS4Drgm8D9wE3DnF3A3eNKlCSNMWaN/iCTX7p+DXhoeK19wAeB9yU5ClwM3LkJdUqSprRt/SmnV1UfAj50yvBjwFVjXleStHn8ZKwkNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNTcq6JNckOSuJN9KcjjJ65JclOS+JEeG7YWbVawk6eyNPaP/CPC3VfVLwKuAw8Be4EBV7QQODPuSpBmZOuiT/AzwG8CdAFX1v1X1DHADsH+Yth+4cWyRkqTpjTmjfzmwAvxFkgeSfDzJS4HLquo4wLC9dK0nJ9mTZDnJ8srKyogyJElnMibotwGvBT5WVa8B/ouzWKapqn1VtVRVSwsLCyPKkCSdyZigPwYcq6qDw/5dTIL/qSTbAYbtiXElSpLGmDroq+rfge8k+cVh6Drgm8A9wK5hbBdw96gKJUmjbBv5/N8DPpXkfOAx4N1Mfnh8Lslu4AngbSOPIUkaYVTQV9WDwNIaD1035nUlSZvHT8ZKUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Nzrok5yX5IEk9w77VyY5mORIks8mOX98mZKkaW3GGf3NwOFV+7cBt1fVTuBpYPcmHEOSNKVRQZ9kB/Bm4OPDfoBrgbuGKfuBG8ccQ5I0ztgz+juADwA/HPYvBp6pqueG/WPA5SOPIUkaYeqgT/IW4ERVHVo9vMbUOs3z9yRZTrK8srIybRmSpHWMOaO/BnhrkseBzzBZsrkDuCDJtmHODuDJtZ5cVfuqaqmqlhYWFkaUIUk6k6mDvqpuqaodVbUIvB34clW9E7gfuGmYtgu4e3SVkqSpbcV19B8E3pfkKJM1+zu34BiSpA3atv6U9VXVV4CvDPcfA67ajNeVJI3nJ2MlqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqbmpgz7JFUnuT3I4ySNJbh7GL0pyX5Ijw/bCzStXknS2xpzRPwe8v6peAVwNvCfJK4G9wIGq2gkcGPYlSTMyddBX1fGq+tpw/z+Aw8DlwA3A/mHafuDGsUVKkqa3KWv0SRaB1wAHgcuq6jhMfhgAl27GMSRJ0xkd9EleBvw18N6q+v5ZPG9PkuUkyysrK2PLkCSdxqigT/ICJiH/qar6/DD8VJLtw+PbgRNrPbeq9lXVUlUtLSwsjClDknQGY666CXAncLiqPrzqoXuAXcP9XcDd05cnSRpr24jnXgP8DvBQkgeHsT8EbgU+l2Q38ATwtnElSpLGmDroq+ofgJzm4eumfV1J0ubyk7GS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1Ny2WRcwzxb3fmEmx3381jfP5LiS5pNn9JLUnEEvSc0Z9JLUnEEvSc0Z9JLU3JZcdZPkeuAjwHnAx6vq1q04zk+qWV3tA17xo601y/f2rDwff6c2/Yw+yXnAnwFvBF4JvCPJKzf7OJKkjdmKM/qrgKNV9RhAks8ANwDf3IJj6Xn2k3jGJc27rVijvxz4zqr9Y8OYJGkGtuKMPmuM1Y9NSvYAe4bd/0zy6JTHuwT47pTPPVd17Al69tWxJ+jZ1znZU24b9fSf38ikrQj6Y8AVq/Z3AE+eOqmq9gH7xh4syXJVLY19nXNJx56gZ18de4KefXXsaaO2Yunmn4CdSa5Mcj7wduCeLTiOJGkDNv2MvqqeS/K7wJeYXF75iap6ZLOPI0namC25jr6qvgh8cSteew2jl3/OQR17gp59dewJevbVsacNSdWP/Z5UktSIX4EgSc3NbdAnuT7Jo0mOJtk763rORpJPJDmR5OFVYxcluS/JkWF74TCeJB8d+vxGktfOrvLTS3JFkvuTHE7ySJKbh/F57+tFSb6a5OtDX38yjF+Z5ODQ12eHCw9I8sJh/+jw+OIs6z+TJOcleSDJvcN+h54eT/JQkgeTLA9jc/0e3AxzGfQNvmbhk8D1p4ztBQ5U1U7gwLAPkx53Drc9wMeepxrP1nPA+6vqFcDVwHuGP5N57+t/gGur6lXAq4Hrk1wN3AbcPvT1NLB7mL8beLqqfgG4fZh3rroZOLxqv0NPAL9ZVa9edSnlvL8Hx6uqubsBrwO+tGr/FuC
"text/plain": [
"<matplotlib.figure.Figure at 0x7fb4b5329320>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Sanity check...\n",
"plt.hist(user_weights.values())"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"[('dandelion@google.com', 569.9801637172158),\n",
" ('jart@google.com', 545.0875820097374),\n",
" ('zeng.chi@gmail.com', 474.4941786809108),\n",
" ('gardener@tensorflow.org', 472.63110233817844),\n",
" ('smilkov@google.com', 434.80813797887964),\n",
" ('wchargin@gmail.com', 393.3191133762563),\n",
" ('nicholsonc@google.com', 335.9915566820012),\n",
" ('nsthorat@google.com', 233.81765373473377),\n",
" ('nobody@tensorflow.org', 202.53648716913858),\n",
" ('dsmilkov@gmail.com', 131.64270699045093)]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Accuracy check, given that the operator is familiar with the repo...\n",
"[(k, user_weights[k]) for k in sorted(user_weights, key=user_weights.get, reverse=True)][:10]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# Build a JSON object to send to the frontend.\n",
"results = {\n",
" \"fileToCommits\": data['file_to_commits'],\n",
" \"commits\": {\n",
" c.hexsha: {\n",
" \"author\": c.author.email,\n",
" \"stats\": c.stats.files,\n",
" }\n",
" for c in data['commits'].values()\n",
" },\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"with open('/tmp/data.json', 'w') as outfile:\n",
" json.dump(results, outfile)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}