Check in experimental commit graph traversal (#8)

Summary:
This is a minimal script that will crawl the file-reachable commits in a
repository and dump statistics to a JSON file that can be further
analyzed interactively on a frontend.

Requires PyPI `gitpython` (and `matplotlib`, but this can be excised).
Requires `jupyter` to run.

(Mostly paired with @dandelionmane on 2018-02-08.)

wchargin-branch: experiment-commit-traversal
This commit is contained in:
William Chargin 2018-02-15 16:11:38 -08:00 committed by Dandelion Mané
parent 84ad9d57bf
commit 03198b0c86
1 changed files with 368 additions and 0 deletions

View File

@ -0,0 +1,368 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Exploring the commit graph\n",
"\n",
"(2018-02-08)\n",
"\n",
"This script crawls a repository's commit graph by inspecting files\n",
"currently in the repository and looking at all commits that have changed\n",
"that file (tracking over renames). The data is lightly processed and is\n",
"serialized to JSON to be displayed by a frontend."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import collections\n",
"import datetime\n",
"import json\n",
"import math\n",
"import os\n",
"import subprocess\n",
"import tempfile\n",
"\n",
"import git\n",
"\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"class RepoContext:\n",
"\n",
" def __init__(self, repo_root):\n",
" self._repo_root = repo_root\n",
" \n",
" def _tempfile_name(self):\n",
" \"\"\"Get a persistent tempfile name keyed against this repo.\"\"\"\n",
" # Encode the path to the repo in a format that can be used in paths.\n",
" sanitized_repo_name = self._repo_root.replace(os.sep, '%')\n",
" filename = 'file_to_hashes_%s.json' % sanitized_repo_name\n",
" return os.path.join(tempfile.gettempdir(), filename)\n",
"\n",
" def _git_cmd(self, args):\n",
" \"\"\"Run git(1) with the specified arguments; return its stdout.\"\"\"\n",
" return subprocess.check_output(['git', '-C', self._repo_root] + args)\n",
"\n",
" def hashes_for_file(self, current_filename):\n",
" assert current_filename, current_filename\n",
" # Obviously really inefficient to spawn a suprocess for each file\n",
" # in the repo, but the `git` module doesn't seem to have an easy way\n",
" # to `--follow`... if productionized, this could just use libgit2\n",
" # bindings for performance.\n",
" hashes = (\n",
" self._git_cmd(['log', '--pretty=%H', '--follow', current_filename])\n",
" ).splitlines()\n",
" return [h.decode('ascii') for h in hashes]\n",
"\n",
" def all_files(self):\n",
" \"\"\"Get a list of all paths to files in the git repo.\n",
" \n",
" Assumes that filenames are UTF-8--encoded, which seems reasonable.\n",
" Alternately, could return a list of `bytes` objects.\n",
" \"\"\"\n",
" file_bytestring_names = self._git_cmd([\n",
" 'ls-tree', '-r', '--full-tree', '--name-only', '-z', 'HEAD'\n",
" ]).split(b'\\0')[:-1] # strip trailing '\\0'\n",
" return [fn.decode('utf-8') for fn in file_bytestring_names]\n",
"\n",
" def load_commit_data(self):\n",
" \"\"\"Load data about the relationships between commits and files.\n",
"\n",
" A commit is _file-reachable_ if it touches a file that currently\n",
" exists in the repository, even if that file might have been renamed\n",
" (but not if it has been deleted).\n",
"\n",
" Returns a dictionary `r` such that\n",
" - `r['file_to_commits']` is a dictionary `d` such that if `f` is a\n",
" path to a file currently tracked in the repository, then `d[f]`\n",
" is the list of commit SHAs for all commits that have touched `f`\n",
" (tracking across renames of `f`);\n",
" - `r['files']` is a list of all files currently in the repository\n",
" (and is the keyset of `r['file_to_commits']`;\n",
" - `r['hashes']` is a list of hashes of all file-reachable commits\n",
" (and is the union of the values of `r['file_to_commits']`;\n",
" - `r['commits']` is a dictionary `d` such that if `h` is the hash\n",
" of a file-reachable commit, then `d[h]` is a gitpython `Commit`\n",
" descriptor for the commit with hash `h`; and\n",
" - `r['repo']` is a gitpython `Repo` object for the repository.\n",
" \"\"\"\n",
" print('Starting: %s' % self._repo_root)\n",
" all_files = self.all_files()\n",
" print('Got %s files' % len(all_files))\n",
"\n",
" repo = git.Repo(self._repo_root)\n",
" head = repo.commit().hexsha\n",
"\n",
" # `files[x] = cs` where `cs` is the list of hashes that touched the\n",
" # file now known as `x`\n",
" try:\n",
" print('Loading file database')\n",
" with open(self._tempfile_name()) as infile:\n",
" result = json.load(infile)\n",
" assert result['base'] == self._repo_root, (\n",
" 'Cache for wrong repo: expected %r, got %r' %\n",
" (self._repo_root, result['base']))\n",
" assert result['head'] == head, (\n",
" 'Cache for wrong HEAD commit: expected %r, got %r'\n",
" % (head, result['head']))\n",
" files = result['files']\n",
" print('Loaded file database')\n",
" except (OSError, json.decoder.JSONDecodeError):\n",
" # Build the cache\n",
" print('Compiling file database')\n",
" files = {fn: self.hashes_for_file(fn) for fn in all_files}\n",
" print('Compiled file database')\n",
" cache = {\n",
" 'base': self._repo_root,\n",
" 'head': head,\n",
" 'files': files,\n",
" }\n",
" print('Created cache for file database')\n",
" with open(self._tempfile_name(), 'w') as outfile:\n",
" json.dump(cache, outfile)\n",
" print('Dumped cache for file database')\n",
"\n",
" all_hashes = frozenset().union(*files.values())\n",
" print('Got %s hashes' % len(all_hashes))\n",
"\n",
" # `commits[h] == c` s.t. `c.hash == h`\n",
" commits = {h: repo.commit(h) for h in all_hashes}\n",
" print('Got %s commits' % len(commits))\n",
" \n",
" return {\n",
" 'file_to_commits': files,\n",
" 'hashes': all_hashes,\n",
" 'files': all_files,\n",
" 'commits': commits,\n",
" 'repo': repo,\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting: /home/wchargin/git/tensorboard\n",
"Got 636 files\n",
"Loading file database\n",
"Loaded file database\n",
"Got 1223 hashes\n",
"Got 1223 commits\n"
]
}
],
"source": [
"data = RepoContext('/home/wchargin/git/tensorboard').load_commit_data()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Which commits are important?\n",
"def _commit_weight(commit):\n",
" return math.log1p(commit.stats.total['lines'])\n",
"\n",
"commit_weights = {\n",
" h: _commit_weight(data['commits'][h]) for h in data['commits']\n",
"}\n",
"weight_values = sorted(commit_weights.values())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([ 1., 30., 0., 112., 55., 79., 73., 76., 78., 68., 63.,\n",
" 77., 55., 58., 107., 68., 70., 50., 38., 15., 9., 6.,\n",
" 13., 5., 8., 1., 2., 1., 2., 3.]),\n",
" array([ 0. , 0.35119409, 0.70238817, 1.05358226, 1.40477635,\n",
" 1.75597043, 2.10716452, 2.45835861, 2.80955269, 3.16074678,\n",
" 3.51194086, 3.86313495, 4.21432904, 4.56552312, 4.91671721,\n",
" 5.2679113 , 5.61910538, 5.97029947, 6.32149356, 6.67268764,\n",
" 7.02388173, 7.37507582, 7.7262699 , 8.07746399, 8.42865807,\n",
" 8.77985216, 9.13104625, 9.48224033, 9.83343442, 10.18462851,\n",
" 10.53582259]),\n",
" <a list of 30 Patch objects>)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADZ5JREFUeJzt3V2oXeWdx/Hvb0yl1ULV5hDSROYEGlqkUJSD2AmUYnphJ6XJRRHLTCdIIDdOa1+gTXvjbYTS1oFBCGqbYUQrqRCp0hlJLWUuJvREZXxJi8FGTSYxp7TatwEr/c/FWcJpPMk5Z6+93Wc/5/uBsNd69nr5r7z8zrOfvZ6VVBWSpHb9zbgLkCSNlkEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJaty6cRcAsH79+pqenh53GZI0UY4dO/brqppaartVEfTT09PMzs6OuwxJmihJXlrOdg7dSFLjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS41bFzNhJNb3v0WVtd3L/jhFXIkkXZo9ekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DgnTGlNcrKb1hJ79JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGLRn0Se5Lci7JswvarkryeJIXutcru/Yk+ZckJ5L8T5LrRlm8JGlpy+nRfx+46by2fcCRqtoKHOnWAT4FbO1+7QXuHk6ZkqRBLRn0VfUz4DfnNe8EDnbLB4FdC9r/reb9N3BFko3DKlaStHKDjtFvqKoz3fJZYEO3vAl4ZcF2p7o2SdKY9P4ytqoKqJXul2Rvktkks3Nzc33LkCRdwKBB/+pbQzLd67mu/TRw9YLtNndtb1NVB6pqpqpmpqamBixDkrSUQYP+EWB3t7wbOLyg/Z+6u29uAF5fMMQjSRqDJZ9Hn+QB4BPA+iSngDuA/cBDSfYALwE3d5s/Bvw9cAL4E3DrCGqWJK3AkkFfVZ+7wFvbF9m2gNv6FiVJGh5nxkpS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxvYI+yZeTPJfk2SQPJHl3ki1JjiY5keQHSS4dVrGSpJUbOOiTbAK+CMxU1UeAS4BbgDuB71TVB4HfAnuGUagkaTB9h27WAe9Jsg64DDgD3Agc6t4/COzqeQ5JUg8DB31VnQa+BbzMfMC/DhwDXquqN7vNTgGb+hYpSRpcn6GbK4GdwBbgA8DlwE0r2H9vktkks3Nzc4OWIUlaQp+hm08Cv6qquar6M/AwsA24ohvKAdgMnF5s56o6UFUzVTUzNTXVowxJ0sWsW3qTC3oZuCHJZcD/AduBWeAJ4LPAg8Bu4HDfIteK6X2PLmu7k/t3jLgSSS3pM0Z/lPkvXZ8EnumOdQD4OvCVJCeA9wP3DqFOSdKA+vToqao7gDvOa34RuL7PcfXOWu4nCfDThDSJegW91JfDVdLo+QgESWqcPfqGrWRIRlK77NFLUuMMeklqnEEvSY0z6CWpcQa9JDXOu24mkHfTSFoJe/SS1DiDXpIaZ9BLUuMMeklqnF/GSkPiA9q0Wtmjl6TGGfSS1DiDXpIa5xi9VmStjUM7OU0tsEcvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapwTpjQSTjSSVg979JLUuF5Bn+SKJIeS/CLJ8SQfS3JVkseTvNC9XjmsYiVJK9e3R38X8OOq+jDwUeA4sA84UlVbgSPduiRpTAYO+iTvAz4O3AtQVW9U1WvATuBgt9lBYFffIiVJg+vTo98CzAHfS/JUknuSXA5sqKoz3TZngQ19i5QkDa5P0K8DrgPurqprgT9y3jBNVRVQi+2cZG+S2SSzc3NzPcqQJF1Mn6A/BZyqqqPd+iHmg//VJBsButdzi+1cVQeqaqaqZqampnqUIUm6mIGDvqrOAq8k+VDXtB14HngE2N217QYO96pQktRL3wlTXwDuT3Ip8CJwK/M/PB5Ksgd4Cbi55zkkST30CvqqehqYWeSt7X2OKw3KGbnS2zkzVpIaZ9BLUuMMeklqnEEvSY3zMcXvAL8g7M/fQ2lw9uglqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjXPClPQOW+7kr5P7d4y4Eq0V9uglqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuN6B32SS5I8leRH3fqWJEeTnEjygySX9i9TkjSoYfTobweOL1i/E/hOVX0Q+C2wZwjnkCQNqFfQJ9kM7ADu6dYD3Agc6jY5COzqcw5JUj99e/TfBb4G/KVbfz/wWlW92a2fAjb1PIckqYeBgz7Jp4FzVXVswP33JplNMjs3NzdoGZKkJfTp0W8DPpPkJPAg80M2dwFXJHnr/6LdDJxebOeqOlBVM1U1MzU11aMMSdLFDBz0VfWNqtpcVdPALcBPquofgCeAz3ab7QYO965SkjSwUdxH/3XgK0lOMD9mf+8IziFJWqZ1S2+ytKr6KfDTbvlF4PphHFday6b3Pbqs7U7u3zHiSjTpnBkrSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuKE8vXISLPdJgODTACW1xR69JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWrcwEGf5OokTyR5PslzSW7v2q9K8niSF7rXK4dXriRppfr06N8EvlpV1wA3ALcluQbYBxypqq3AkW5dkjQmAwd9VZ2pqie75d8Dx4FNwE7gYLfZQWBX3yIlSYMbyhh9kmngWuAosKGqznRvnQU2DOMckqTB9A76JO8Ffgh8qap+t/C9qiqgLrDf3iSzSWbn5ub6liFJuoBeQZ/kXcyH/P1V9XDX/GqSjd37G4Fzi+1bVQeqaqaqZqampvqUIUm6iD533QS4FzheVd9e8NYjwO5ueTdwePDyJEl9reux7zbg88AzSZ7u2r4J7AceSrIHeAm4uV+JkqQ+Bg76qvovIBd4e/ugx5UkDZczYyWpcQa9JDXOoJekxhn0ktS4PnfdSFoFpvc9uqztTu7fMeJKtFrZo5ekxhn0ktQ4g16SGucYvaSBLPe7AfD7gXGzRy9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIa5+2V0hrhoxLWLnv0ktQ4e/SSVg0/dYyGPXpJapxBL0mNM+glqXGO0Uv6Kyt5WJkmgz16SWqcPXpJI+enhPGyRy9JjTPoJalxDt1I0gispslf9uglqXEj6dEnuQm4C7gEuKeq9o/iPJJ0McPuVU/ql8pD79EnuQT4V+BTwDXA55JcM+zzSJKWZxQ9+uuBE1X1IkCSB4GdwPMjONfE/oSVNLhh/7tvPUdGMUa/CXhlwfqprk2SNAZju+smyV5gb7f6hyS/HPBQ64FfD6eqeblzmEcbmqFf5yrldbZjLVwj9LzOnnnzt8vZaBRBfxq4esH65q7tr1TVAeBA35Mlma2qmb7HWe28zrashetcC9cIk3Gdoxi6+TmwNcmWJJcCtwCPjOA8kqRlGHqPvqreTPLPwH8wf3vlfVX13LDPI0lanpGM0VfVY8Bjozj2InoP/0wIr7Mta+E618I1wgRcZ6pq3DVIkkbIRyBIUuMmOuiT3JTkl0lOJNk37npGIcnVSZ5I8nyS55LcPu6aRiXJJUmeSvKjcdcyKkmuSHIoyS+SHE/ysXHXNApJvtz9fX02yQNJ3j3umoYhyX1JziV5dkHbVUkeT/JC93rlOGtczMQG/Rp61MKbwFer6hrgBuC2Rq8T4Hbg+LiLGLG7gB9X1YeBj9Lg9SbZBHwRmKmqjzB/U8Yt461qaL4P3HRe2z7gSFVtBY5066vKxAY9Cx61UFVvAG89aqEpVXWmqp7sln/PfDA0N9M4yWZgB3DPuGsZlSTvAz4O3AtQVW9U1WvjrWpk1gHvSbIOuAz43zHXMxRV9TPgN+c17wQOdssHgV3vaFHLMMlBv+YetZBkGrgWODreSkbiu8DXgL+Mu5AR2gLMAd/rhqjuSXL5uIsatqo6DXwLeBk4A7xeVf853qpGakNVnemWzwIbxlnMYiY56NeUJO8Ffgh8qap+N+56hinJp4FzVXVs3LWM2DrgOuDuqroW+COr8GN+X90Y9U7mf7B9ALg8yT+Ot6p3Rs3fxrjqbmWc5KBf1qMWWpDkXcyH/P1V9fC46xmBbcBnkpxkfgjuxiT/Pt6SRuIUcKqq3vpEdoj54G/NJ4FfVdVcVf0ZeBj4uzHXNEqvJtkI0L2eG3M9bzPJQb8mHrWQJMyP6R6vqm+Pu55RqKpvVNXmqppm/s/xJ1XVXA+wqs4CryT5UNe0nRE9vnvMXgZuSHJZ9/d3Ow1+6bzAI8Dubnk3cHiMtSxqYv/P2DX0qIVtwOeBZ5I83bV9s5t9rMnzBeD+rnPyInDrmOsZuqo6muQQ8CTzd409xQTMHl2OJA8AnwDWJzkF3AHsBx5Ksgd4Cbh5fBUuzpmxktS4SR66kSQtg0EvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1Lj/h+tOhhCt7WwHAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7f2089a264a8>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Sanity check...\n",
"plt.hist(commit_weights.values(), bins=30)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Which users are important?\n",
"user_weights = collections.defaultdict(lambda: 0.0)\n",
"email_resolutions = {\n",
" 'danmane@gmail.com': 'dandelion@google.com',\n",
"}\n",
"for h in data['commits']:\n",
" nominal_email = data['commits'][h].author.email\n",
" email = email_resolutions.get(nominal_email, nominal_email)\n",
" user_weights[email] += commit_weights[h]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([95., 4., 1., 1., 1., 1., 2., 1., 1., 2.]),\n",
" array([ 0.69314718, 57.62184883, 114.55055049, 171.47925214,\n",
" 228.4079538 , 285.33665545, 342.2653571 , 399.19405876,\n",
" 456.12276041, 513.05146206, 569.98016372]),\n",
" <a list of 10 Patch objects>)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADMlJREFUeJzt3F+MpfVdx/H3R6aUQk35N25Wljg0EBtiLDQThNCYCmqwNIUL0tA0ujGb7E1Vapu0iyY23kFiSjExjZtS3QtSqBQDoU0rLvTCm62zQMufLbLFpWWzsFMDVL3Qrv16cR5wxF3mzM6ZPXO+vl/JyZznOc/M+f7g7HuffWbOpKqQJM2+n5n2AJKkyTDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKamDuVT3b++efXwsLCqXxKSZp5+/fv/1FVza923CkN+sLCAktLS6fyKSVp5iV5YZzjvOQiSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTZzSd4qux8Kur03leQ/ddv1UnleS1sozdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1MRYQU/yh0meTvJUki8nOSPJRUn2JTmY5N4kp2/0sJKkE1s16EkuAP4AWKyqXwJOA24GbgfuqKqLgVeAHRs5qCTprY17yWUOeEeSOeBM4AhwDXDf8Pge4MbJjydJGteqQa+qw8CfAT9gFPLXgP3Aq1V1bDjsReCC431+kp1JlpIsLS8vT2ZqSdL/Mc4ll3OAG4CLgJ8HzgKuG/cJqmp3VS1W1eL8/PxJDypJemvjXHL5deCfq2q5qn4C3A9cDZw9XIIB2AYc3qAZJUljGCfoPwCuTHJmkgDXAs8AjwI3DcdsBx7YmBElSeMY5xr6Pkbf/HwMeHL4nN3AZ4BPJjkInAfctYFzSpJWMbf6IVBVnwU++6bdzwNXTHwiSdJJ8Z2iktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6Qmxgp6krOT3Jfke0kOJLkqyblJHk7y3PDxnI0eVpJ0YuOeod8JfKOq3gO8FzgA7AL2VtUlwN5hW5I0JasGPcm7gF8F7gKoqv+sqleBG4A9w2F7gBs3akhJ0urGOUO/CFgG/irJ40m+mOQsYEtVHRmOeQnYslFDSpJWN07Q54D3AV+oqsuBf+dNl1eqqoA63icn2ZlkKcnS8vLyeueVJJ3AOEF/EXixqvYN2/cxCvzLSbYCDB+PHu+Tq2p3VS1W1eL8/PwkZpYkHceqQa+ql4AfJvnFYde1wDPAg8D2Yd924IENmVCSNJa5MY/7feDuJKcDzwO/y+gvg68k2QG8AHxkY0aUJI1jrKBX1RPA4nEeunay40iSTpbvFJWkJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJamJsYOe5LQkjyd5aNi+KMm+JAeT3Jvk9I0bU5K0mrWcod8CHFixfTtwR1VdDLwC7JjkYJKktRkr6Em2AdcDXxy2A1wD3Dccsge4cSMGlCSNZ9wz9M8DnwZ+OmyfB7xaVceG7ReBCyY8myRpDVYNepIPAUerav/JPEGSnUmWkiwtLy+fzJeQJI1hnDP0q4EPJzkE3MPoUsudwNlJ5oZjtgGHj/fJVbW7qharanF+fn4CI0uSjmfVoFfVrVW1raoWgJuBR6rqY8CjwE3DYduBBzZsSknSqtbzc+ifAT6Z5CCja+p3TWYkSdLJmFv9kP9RVd8CvjXcfx64YvIjSZJOhu8UlaQmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqYlVg57kwiSPJnkmydNJbhn2n5vk4STPDR/P2fhxJUknMs4Z+jHgU1V1KXAl8PEklwK7gL1VdQmwd9iWJE3JqkGvqiNV9dhw/1+BA8AFwA3AnuGwPcCNGzWkJGl1a7qGnmQBuBzYB2ypqiPDQy8BW07wOTuTLCVZWl5eXseokqS3MnbQk7wT+Crwiar68crHqqqAOt7nVdXuqlqsqsX5+fl1DStJOrGxgp7kbYxifndV3T/sfjnJ1uHxrcDRjRlRkjSOcX7KJcBdwIGq+tyKhx4Etg/3twMPTH48SdK45sY45mrgt4Enkzwx7Psj4DbgK0l2AC8AH9mYESVJ41g16FX1D0BO8PC1kx1HknSyfKeoJDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhNz0x5gs1vY9bWpPfeh266f2nNLmj2eoUtSEwZdkpow6JLUhNfQpSmb1vdp/j9+j6b7f+t1naEnuS7Js0kOJtk1qaEkSWt30kFPchrwF8BvAZcCH01y6aQGkyStzXouuVwBHKyq5wGS3APcADwzicE03R+ZVH++vvpZzyWXC4Afrth+cdgnSZqCDf+maJKdwM5h89+SPHuSX+p84EeTmWrT6Lgm6LmujmuCnuvadGvK7ev+Er8wzkHrCfph4MIV29uGff9LVe0Gdq/jeQBIslRVi+v9OptJxzVBz3V1XBP0XFfHNY1rPZdc/hG4JMlFSU4HbgYenMxYkqS1Oukz9Ko6luT3gG8CpwFfqqqnJzaZJGlN1nUNvaq+Dnx9QrOsZt2XbTahjmuCnuvquCboua6OaxpLqmraM0iSJsDf5SJJTWz6oM/yrxdI8qUkR5M8tWLfuUkeTvLc8PGcYX+S/Pmwzu8med/0Jj+xJBcmeTTJM0meTnLLsH9m15XkjCTfTvKdYU1/Ouy/KMm+YfZ7h2/+k+Ttw/bB4fGFac6/miSnJXk8yUPD9kyvK8mhJE8meSLJ0rBvZl9/k7Spg97g1wv8NXDdm/btAvZW1SXA3mEbRmu8ZLjtBL5wimZcq2PAp6rqUuBK4OPD/5NZXtd/ANdU1XuBy4DrklwJ3A7cUVUXA68AO4bjdwCvDPvvGI7bzG4BDqzY7rCuX6uqy1b8eOIsv/4mp6o27Q24Cvjmiu1bgVunPdca17AAPLVi+1lg63B/K/DscP8vgY8e77jNfAMeAH6jy7qAM4HHgF9h9OaUuWH/G69FRj/ZddVwf244LtOe/QTr2cYocNcADwGZ9XUBh4Dz37SvxetvvbdNfYZOz18vsKWqjgz3XwK2DPdnbq3DP8kvB/Yx4+saLks8ARwFHga+D7xaVceGQ1bO/caahsdfA847tROP7fPAp4GfDtvnMfvrKuDvkuwf3okOM/76mxR/H/oUVVUlmckfM0ryTuCrwCeq6sdJ3nhsFtdVVf8FXJbkbOBvgfdMeaR1S/Ih4GhV7U/ygWnPM0Hvr6rDSX4OeDjJ91Y+OIuvv0nZ7GfoY/16gRnzcpKtAMPHo8P+mVlrkrcxivndVXX/sHvm1wVQVa8CjzK6FHF2ktdPelbO/caahsffBfzLKR51HFcDH05yCLiH0WWXO5nxdVXV4eHjUUZ/+V5Bk9ffem32oHf89QIPAtuH+9sZXYN+ff/vDN+VvxJ4bcU/ITeNjE7F7wIOVNXnVjw0s+tKMj+cmZPkHYy+J3CAUdhvGg5785peX+tNwCM1XKDdTKrq1qraVlULjP7sPFJVH2OG15XkrCQ/+/p94DeBp5jh199ETfsi/mo34IPAPzG6pvnH055njbN/GTgC/ITRtbsdjK5J7gWeA/4eOHc4Nox+ouf7wJPA4rTnP8Ga3s/oGuZ3gSeG2wdneV3ALwOPD2t6CviTYf+7gW8DB4G/Ad4+7D9j2D44PP7uaa9hjDV+AHho1tc1zP6d4fb0602Y5dffJG++U1SSmtjsl1wkSWMy6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1IT/w2ZJDu/GsgFSgAAAABJRU5ErkJggg==\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7f2087770240>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Sanity check...\n",
"plt.hist(user_weights.values())"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"[('dandelion@google.com', 569.980163717216),\n",
" ('jart@google.com', 524.5859947976339),\n",
" ('gardener@tensorflow.org', 472.63110233817844),\n",
" ('smilkov@google.com', 434.80813797887964),\n",
" ('wchargin@gmail.com', 393.3191133762563),\n",
" ('zeng.chi@gmail.com', 359.13737400821606),\n",
" ('nicholsonc@google.com', 335.99155668200115),\n",
" ('nsthorat@google.com', 233.81765373473374),\n",
" ('nobody@tensorflow.org', 202.53648716913855),\n",
" ('dsmilkov@gmail.com', 131.64270699045096)]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Accuracy check, given that the operator is familiar with the repo...\n",
"[(k, user_weights[k]) for k in sorted(user_weights, key=user_weights.get, reverse=True)][:10]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# Build a JSON object to send to the frontend.\n",
"results = {\n",
" \"file_to_commits\": data['file_to_commits'],\n",
" \"commits\": {\n",
" c.hexsha: {\n",
" \"author\": c.author.email,\n",
" \"stats\": c.stats.files,\n",
" }\n",
" for c in data['commits'].values()\n",
" },\n",
" \"authors\": sorted(frozenset(c.author.email for c in data['commits'].values())),\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"with open('/tmp/data.json', 'w') as outfile:\n",
" json.dump(results, outfile)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}