diff --git a/experiments/01_commit_graph_dump.ipynb b/experiments/01_commit_graph_dump.ipynb new file mode 100644 index 0000000..f7aad5f --- /dev/null +++ b/experiments/01_commit_graph_dump.ipynb @@ -0,0 +1,368 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exploring the commit graph\n", + "\n", + "(2018-02-08)\n", + "\n", + "This script crawls a repository's commit graph by inspecting files\n", + "currently in the repository and looking at all commits that have changed\n", + "that file (tracking over renames). The data is lightly processed and is\n", + "serialized to JSON to be displayed by a frontend." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import collections\n", + "import datetime\n", + "import json\n", + "import math\n", + "import os\n", + "import subprocess\n", + "import tempfile\n", + "\n", + "import git\n", + "\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "class RepoContext:\n", + "\n", + " def __init__(self, repo_root):\n", + " self._repo_root = repo_root\n", + " \n", + " def _tempfile_name(self):\n", + " \"\"\"Get a persistent tempfile name keyed against this repo.\"\"\"\n", + " # Encode the path to the repo in a format that can be used in paths.\n", + " sanitized_repo_name = self._repo_root.replace(os.sep, '%')\n", + " filename = 'file_to_hashes_%s.json' % sanitized_repo_name\n", + " return os.path.join(tempfile.gettempdir(), filename)\n", + "\n", + " def _git_cmd(self, args):\n", + " \"\"\"Run git(1) with the specified arguments; return its stdout.\"\"\"\n", + " return subprocess.check_output(['git', '-C', self._repo_root] + args)\n", + "\n", + " def hashes_for_file(self, current_filename):\n", + " assert current_filename, current_filename\n", + " # Obviously really inefficient to spawn a suprocess for each file\n", + " # in the repo, but the `git` module doesn't seem to have an easy way\n", + " # to `--follow`... if productionized, this could just use libgit2\n", + " # bindings for performance.\n", + " hashes = (\n", + " self._git_cmd(['log', '--pretty=%H', '--follow', current_filename])\n", + " ).splitlines()\n", + " return [h.decode('ascii') for h in hashes]\n", + "\n", + " def all_files(self):\n", + " \"\"\"Get a list of all paths to files in the git repo.\n", + " \n", + " Assumes that filenames are UTF-8--encoded, which seems reasonable.\n", + " Alternately, could return a list of `bytes` objects.\n", + " \"\"\"\n", + " file_bytestring_names = self._git_cmd([\n", + " 'ls-tree', '-r', '--full-tree', '--name-only', '-z', 'HEAD'\n", + " ]).split(b'\\0')[:-1] # strip trailing '\\0'\n", + " return [fn.decode('utf-8') for fn in file_bytestring_names]\n", + "\n", + " def load_commit_data(self):\n", + " \"\"\"Load data about the relationships between commits and files.\n", + "\n", + " A commit is _file-reachable_ if it touches a file that currently\n", + " exists in the repository, even if that file might have been renamed\n", + " (but not if it has been deleted).\n", + "\n", + " Returns a dictionary `r` such that\n", + " - `r['file_to_commits']` is a dictionary `d` such that if `f` is a\n", + " path to a file currently tracked in the repository, then `d[f]`\n", + " is the list of commit SHAs for all commits that have touched `f`\n", + " (tracking across renames of `f`);\n", + " - `r['files']` is a list of all files currently in the repository\n", + " (and is the keyset of `r['file_to_commits']`;\n", + " - `r['hashes']` is a list of hashes of all file-reachable commits\n", + " (and is the union of the values of `r['file_to_commits']`;\n", + " - `r['commits']` is a dictionary `d` such that if `h` is the hash\n", + " of a file-reachable commit, then `d[h]` is a gitpython `Commit`\n", + " descriptor for the commit with hash `h`; and\n", + " - `r['repo']` is a gitpython `Repo` object for the repository.\n", + " \"\"\"\n", + " print('Starting: %s' % self._repo_root)\n", + " all_files = self.all_files()\n", + " print('Got %s files' % len(all_files))\n", + "\n", + " repo = git.Repo(self._repo_root)\n", + " head = repo.commit().hexsha\n", + "\n", + " # `files[x] = cs` where `cs` is the list of hashes that touched the\n", + " # file now known as `x`\n", + " try:\n", + " print('Loading file database')\n", + " with open(self._tempfile_name()) as infile:\n", + " result = json.load(infile)\n", + " assert result['base'] == self._repo_root, (\n", + " 'Cache for wrong repo: expected %r, got %r' %\n", + " (self._repo_root, result['base']))\n", + " assert result['head'] == head, (\n", + " 'Cache for wrong HEAD commit: expected %r, got %r'\n", + " % (head, result['head']))\n", + " files = result['files']\n", + " print('Loaded file database')\n", + " except (OSError, json.decoder.JSONDecodeError):\n", + " # Build the cache\n", + " print('Compiling file database')\n", + " files = {fn: self.hashes_for_file(fn) for fn in all_files}\n", + " print('Compiled file database')\n", + " cache = {\n", + " 'base': self._repo_root,\n", + " 'head': head,\n", + " 'files': files,\n", + " }\n", + " print('Created cache for file database')\n", + " with open(self._tempfile_name(), 'w') as outfile:\n", + " json.dump(cache, outfile)\n", + " print('Dumped cache for file database')\n", + "\n", + " all_hashes = frozenset().union(*files.values())\n", + " print('Got %s hashes' % len(all_hashes))\n", + "\n", + " # `commits[h] == c` s.t. `c.hash == h`\n", + " commits = {h: repo.commit(h) for h in all_hashes}\n", + " print('Got %s commits' % len(commits))\n", + " \n", + " return {\n", + " 'file_to_commits': files,\n", + " 'hashes': all_hashes,\n", + " 'files': all_files,\n", + " 'commits': commits,\n", + " 'repo': repo,\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting: /home/wchargin/git/tensorboard\n", + "Got 636 files\n", + "Loading file database\n", + "Loaded file database\n", + "Got 1223 hashes\n", + "Got 1223 commits\n" + ] + } + ], + "source": [ + "data = RepoContext('/home/wchargin/git/tensorboard').load_commit_data()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Which commits are important?\n", + "def _commit_weight(commit):\n", + " return math.log1p(commit.stats.total['lines'])\n", + "\n", + "commit_weights = {\n", + " h: _commit_weight(data['commits'][h]) for h in data['commits']\n", + "}\n", + "weight_values = sorted(commit_weights.values())" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 1., 30., 0., 112., 55., 79., 73., 76., 78., 68., 63.,\n", + " 77., 55., 58., 107., 68., 70., 50., 38., 15., 9., 6.,\n", + " 13., 5., 8., 1., 2., 1., 2., 3.]),\n", + " array([ 0. , 0.35119409, 0.70238817, 1.05358226, 1.40477635,\n", + " 1.75597043, 2.10716452, 2.45835861, 2.80955269, 3.16074678,\n", + " 3.51194086, 3.86313495, 4.21432904, 4.56552312, 4.91671721,\n", + " 5.2679113 , 5.61910538, 5.97029947, 6.32149356, 6.67268764,\n", + " 7.02388173, 7.37507582, 7.7262699 , 8.07746399, 8.42865807,\n", + " 8.77985216, 9.13104625, 9.48224033, 9.83343442, 10.18462851,\n", + " 10.53582259]),\n", + " )" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADZ5JREFUeJzt3V2oXeWdx/Hvb0yl1ULV5hDSROYEGlqkUJSD2AmUYnphJ6XJRRHLTCdIIDdOa1+gTXvjbYTS1oFBCGqbYUQrqRCp0hlJLWUuJvREZXxJi8FGTSYxp7TatwEr/c/FWcJpPMk5Z6+93Wc/5/uBsNd69nr5r7z8zrOfvZ6VVBWSpHb9zbgLkCSNlkEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJaty6cRcAsH79+pqenh53GZI0UY4dO/brqppaartVEfTT09PMzs6OuwxJmihJXlrOdg7dSFLjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS41bFzNhJNb3v0WVtd3L/jhFXIkkXZo9ekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DgnTGlNcrKb1hJ79JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGLRn0Se5Lci7JswvarkryeJIXutcru/Yk+ZckJ5L8T5LrRlm8JGlpy+nRfx+46by2fcCRqtoKHOnWAT4FbO1+7QXuHk6ZkqRBLRn0VfUz4DfnNe8EDnbLB4FdC9r/reb9N3BFko3DKlaStHKDjtFvqKoz3fJZYEO3vAl4ZcF2p7o2SdKY9P4ytqoKqJXul2Rvktkks3Nzc33LkCRdwKBB/+pbQzLd67mu/TRw9YLtNndtb1NVB6pqpqpmpqamBixDkrSUQYP+EWB3t7wbOLyg/Z+6u29uAF5fMMQjSRqDJZ9Hn+QB4BPA+iSngDuA/cBDSfYALwE3d5s/Bvw9cAL4E3DrCGqWJK3AkkFfVZ+7wFvbF9m2gNv6FiVJGh5nxkpS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxvYI+yZeTPJfk2SQPJHl3ki1JjiY5keQHSS4dVrGSpJUbOOiTbAK+CMxU1UeAS4BbgDuB71TVB4HfAnuGUagkaTB9h27WAe9Jsg64DDgD3Agc6t4/COzqeQ5JUg8DB31VnQa+BbzMfMC/DhwDXquqN7vNTgGb+hYpSRpcn6GbK4GdwBbgA8DlwE0r2H9vktkks3Nzc4OWIUlaQp+hm08Cv6qquar6M/AwsA24ohvKAdgMnF5s56o6UFUzVTUzNTXVowxJ0sWsW3qTC3oZuCHJZcD/AduBWeAJ4LPAg8Bu4HDfIteK6X2PLmu7k/t3jLgSSS3pM0Z/lPkvXZ8EnumOdQD4OvCVJCeA9wP3DqFOSdKA+vToqao7gDvOa34RuL7PcfXOWu4nCfDThDSJegW91JfDVdLo+QgESWqcPfqGrWRIRlK77NFLUuMMeklqnEEvSY0z6CWpcQa9JDXOu24mkHfTSFoJe/SS1DiDXpIaZ9BLUuMMeklqnF/GSkPiA9q0Wtmjl6TGGfSS1DiDXpIa5xi9VmStjUM7OU0tsEcvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapwTpjQSTjSSVg979JLUuF5Bn+SKJIeS/CLJ8SQfS3JVkseTvNC9XjmsYiVJK9e3R38X8OOq+jDwUeA4sA84UlVbgSPduiRpTAYO+iTvAz4O3AtQVW9U1WvATuBgt9lBYFffIiVJg+vTo98CzAHfS/JUknuSXA5sqKoz3TZngQ19i5QkDa5P0K8DrgPurqprgT9y3jBNVRVQi+2cZG+S2SSzc3NzPcqQJF1Mn6A/BZyqqqPd+iHmg//VJBsButdzi+1cVQeqaqaqZqampnqUIUm6mIGDvqrOAq8k+VDXtB14HngE2N217QYO96pQktRL3wlTXwDuT3Ip8CJwK/M/PB5Ksgd4Cbi55zkkST30CvqqehqYWeSt7X2OKw3KGbnS2zkzVpIaZ9BLUuMMeklqnEEvSY3zMcXvAL8g7M/fQ2lw9uglqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjXPClPQOW+7kr5P7d4y4Eq0V9uglqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuN6B32SS5I8leRH3fqWJEeTnEjygySX9i9TkjSoYfTobweOL1i/E/hOVX0Q+C2wZwjnkCQNqFfQJ9kM7ADu6dYD3Agc6jY5COzqcw5JUj99e/TfBb4G/KVbfz/wWlW92a2fAjb1PIckqYeBgz7Jp4FzVXVswP33JplNMjs3NzdoGZKkJfTp0W8DPpPkJPAg80M2dwFXJHnr/6LdDJxebOeqOlBVM1U1MzU11aMMSdLFDBz0VfWNqtpcVdPALcBPquofgCeAz3ab7QYO965SkjSwUdxH/3XgK0lOMD9mf+8IziFJWqZ1S2+ytKr6KfDTbvlF4PphHFday6b3Pbqs7U7u3zHiSjTpnBkrSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuKE8vXISLPdJgODTACW1xR69JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWrcwEGf5OokTyR5PslzSW7v2q9K8niSF7rXK4dXriRppfr06N8EvlpV1wA3ALcluQbYBxypqq3AkW5dkjQmAwd9VZ2pqie75d8Dx4FNwE7gYLfZQWBX3yIlSYMbyhh9kmngWuAosKGqznRvnQU2DOMckqTB9A76JO8Ffgh8qap+t/C9qiqgLrDf3iSzSWbn5ub6liFJuoBeQZ/kXcyH/P1V9XDX/GqSjd37G4Fzi+1bVQeqaqaqZqampvqUIUm6iD533QS4FzheVd9e8NYjwO5ueTdwePDyJEl9reux7zbg88AzSZ7u2r4J7AceSrIHeAm4uV+JkqQ+Bg76qvovIBd4e/ugx5UkDZczYyWpcQa9JDXOoJekxhn0ktS4PnfdSFoFpvc9uqztTu7fMeJKtFrZo5ekxhn0ktQ4g16SGucYvaSBLPe7AfD7gXGzRy9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIa5+2V0hrhoxLWLnv0ktQ4e/SSVg0/dYyGPXpJapxBL0mNM+glqXGO0Uv6Kyt5WJkmgz16SWqcPXpJI+enhPGyRy9JjTPoJalxDt1I0gispslf9uglqXEj6dEnuQm4C7gEuKeq9o/iPJJ0McPuVU/ql8pD79EnuQT4V+BTwDXA55JcM+zzSJKWZxQ9+uuBE1X1IkCSB4GdwPMjONfE/oSVNLhh/7tvPUdGMUa/CXhlwfqprk2SNAZju+smyV5gb7f6hyS/HPBQ64FfD6eqeblzmEcbmqFf5yrldbZjLVwj9LzOnnnzt8vZaBRBfxq4esH65q7tr1TVAeBA35Mlma2qmb7HWe28zrashetcC9cIk3Gdoxi6+TmwNcmWJJcCtwCPjOA8kqRlGHqPvqreTPLPwH8wf3vlfVX13LDPI0lanpGM0VfVY8Bjozj2InoP/0wIr7Mta+E618I1wgRcZ6pq3DVIkkbIRyBIUuMmOuiT3JTkl0lOJNk37npGIcnVSZ5I8nyS55LcPu6aRiXJJUmeSvKjcdcyKkmuSHIoyS+SHE/ysXHXNApJvtz9fX02yQNJ3j3umoYhyX1JziV5dkHbVUkeT/JC93rlOGtczMQG/Rp61MKbwFer6hrgBuC2Rq8T4Hbg+LiLGLG7gB9X1YeBj9Lg9SbZBHwRmKmqjzB/U8Yt461qaL4P3HRe2z7gSFVtBY5066vKxAY9Cx61UFVvAG89aqEpVXWmqp7sln/PfDA0N9M4yWZgB3DPuGsZlSTvAz4O3AtQVW9U1WvjrWpk1gHvSbIOuAz43zHXMxRV9TPgN+c17wQOdssHgV3vaFHLMMlBv+YetZBkGrgWODreSkbiu8DXgL+Mu5AR2gLMAd/rhqjuSXL5uIsatqo6DXwLeBk4A7xeVf853qpGakNVnemWzwIbxlnMYiY56NeUJO8Ffgh8qap+N+56hinJp4FzVXVs3LWM2DrgOuDuqroW+COr8GN+X90Y9U7mf7B9ALg8yT+Ot6p3Rs3fxrjqbmWc5KBf1qMWWpDkXcyH/P1V9fC46xmBbcBnkpxkfgjuxiT/Pt6SRuIUcKqq3vpEdoj54G/NJ4FfVdVcVf0ZeBj4uzHXNEqvJtkI0L2eG3M9bzPJQb8mHrWQJMyP6R6vqm+Pu55RqKpvVNXmqppm/s/xJ1XVXA+wqs4CryT5UNe0nRE9vnvMXgZuSHJZ9/d3Ow1+6bzAI8Dubnk3cHiMtSxqYv/P2DX0qIVtwOeBZ5I83bV9s5t9rMnzBeD+rnPyInDrmOsZuqo6muQQ8CTzd409xQTMHl2OJA8AnwDWJzkF3AHsBx5Ksgd4Cbh5fBUuzpmxktS4SR66kSQtg0EvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1Lj/h+tOhhCt7WwHAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Sanity check...\n", + "plt.hist(commit_weights.values(), bins=30)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Which users are important?\n", + "user_weights = collections.defaultdict(lambda: 0.0)\n", + "email_resolutions = {\n", + " 'danmane@gmail.com': 'dandelion@google.com',\n", + "}\n", + "for h in data['commits']:\n", + " nominal_email = data['commits'][h].author.email\n", + " email = email_resolutions.get(nominal_email, nominal_email)\n", + " user_weights[email] += commit_weights[h]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([95., 4., 1., 1., 1., 1., 2., 1., 1., 2.]),\n", + " array([ 0.69314718, 57.62184883, 114.55055049, 171.47925214,\n", + " 228.4079538 , 285.33665545, 342.2653571 , 399.19405876,\n", + " 456.12276041, 513.05146206, 569.98016372]),\n", + " )" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADMlJREFUeJzt3F+MpfVdx/H3R6aUQk35N25Wljg0EBtiLDQThNCYCmqwNIUL0tA0ujGb7E1Vapu0iyY23kFiSjExjZtS3QtSqBQDoU0rLvTCm62zQMufLbLFpWWzsFMDVL3Qrv16cR5wxF3mzM6ZPXO+vl/JyZznOc/M+f7g7HuffWbOpKqQJM2+n5n2AJKkyTDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKamDuVT3b++efXwsLCqXxKSZp5+/fv/1FVza923CkN+sLCAktLS6fyKSVp5iV5YZzjvOQiSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTZzSd4qux8Kur03leQ/ddv1UnleS1sozdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1MRYQU/yh0meTvJUki8nOSPJRUn2JTmY5N4kp2/0sJKkE1s16EkuAP4AWKyqXwJOA24GbgfuqKqLgVeAHRs5qCTprY17yWUOeEeSOeBM4AhwDXDf8Pge4MbJjydJGteqQa+qw8CfAT9gFPLXgP3Aq1V1bDjsReCC431+kp1JlpIsLS8vT2ZqSdL/Mc4ll3OAG4CLgJ8HzgKuG/cJqmp3VS1W1eL8/PxJDypJemvjXHL5deCfq2q5qn4C3A9cDZw9XIIB2AYc3qAZJUljGCfoPwCuTHJmkgDXAs8AjwI3DcdsBx7YmBElSeMY5xr6Pkbf/HwMeHL4nN3AZ4BPJjkInAfctYFzSpJWMbf6IVBVnwU++6bdzwNXTHwiSdJJ8Z2iktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6Qmxgp6krOT3Jfke0kOJLkqyblJHk7y3PDxnI0eVpJ0YuOeod8JfKOq3gO8FzgA7AL2VtUlwN5hW5I0JasGPcm7gF8F7gKoqv+sqleBG4A9w2F7gBs3akhJ0urGOUO/CFgG/irJ40m+mOQsYEtVHRmOeQnYslFDSpJWN07Q54D3AV+oqsuBf+dNl1eqqoA63icn2ZlkKcnS8vLyeueVJJ3AOEF/EXixqvYN2/cxCvzLSbYCDB+PHu+Tq2p3VS1W1eL8/PwkZpYkHceqQa+ql4AfJvnFYde1wDPAg8D2Yd924IENmVCSNJa5MY/7feDuJKcDzwO/y+gvg68k2QG8AHxkY0aUJI1jrKBX1RPA4nEeunay40iSTpbvFJWkJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJamJsYOe5LQkjyd5aNi+KMm+JAeT3Jvk9I0bU5K0mrWcod8CHFixfTtwR1VdDLwC7JjkYJKktRkr6Em2AdcDXxy2A1wD3Dccsge4cSMGlCSNZ9wz9M8DnwZ+OmyfB7xaVceG7ReBCyY8myRpDVYNepIPAUerav/JPEGSnUmWkiwtLy+fzJeQJI1hnDP0q4EPJzkE3MPoUsudwNlJ5oZjtgGHj/fJVbW7qharanF+fn4CI0uSjmfVoFfVrVW1raoWgJuBR6rqY8CjwE3DYduBBzZsSknSqtbzc+ifAT6Z5CCja+p3TWYkSdLJmFv9kP9RVd8CvjXcfx64YvIjSZJOhu8UlaQmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqYlVg57kwiSPJnkmydNJbhn2n5vk4STPDR/P2fhxJUknMs4Z+jHgU1V1KXAl8PEklwK7gL1VdQmwd9iWJE3JqkGvqiNV9dhw/1+BA8AFwA3AnuGwPcCNGzWkJGl1a7qGnmQBuBzYB2ypqiPDQy8BW07wOTuTLCVZWl5eXseokqS3MnbQk7wT+Crwiar68crHqqqAOt7nVdXuqlqsqsX5+fl1DStJOrGxgp7kbYxifndV3T/sfjnJ1uHxrcDRjRlRkjSOcX7KJcBdwIGq+tyKhx4Etg/3twMPTH48SdK45sY45mrgt4Enkzwx7Psj4DbgK0l2AC8AH9mYESVJ41g16FX1D0BO8PC1kx1HknSyfKeoJDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhNz0x5gs1vY9bWpPfeh266f2nNLmj2eoUtSEwZdkpow6JLUhNfQpSmb1vdp/j9+j6b7f+t1naEnuS7Js0kOJtk1qaEkSWt30kFPchrwF8BvAZcCH01y6aQGkyStzXouuVwBHKyq5wGS3APcADwzicE03R+ZVH++vvpZzyWXC4Afrth+cdgnSZqCDf+maJKdwM5h89+SPHuSX+p84EeTmWrT6Lgm6LmujmuCnuvadGvK7ev+Er8wzkHrCfph4MIV29uGff9LVe0Gdq/jeQBIslRVi+v9OptJxzVBz3V1XBP0XFfHNY1rPZdc/hG4JMlFSU4HbgYenMxYkqS1Oukz9Ko6luT3gG8CpwFfqqqnJzaZJGlN1nUNvaq+Dnx9QrOsZt2XbTahjmuCnuvquCboua6OaxpLqmraM0iSJsDf5SJJTWz6oM/yrxdI8qUkR5M8tWLfuUkeTvLc8PGcYX+S/Pmwzu8med/0Jj+xJBcmeTTJM0meTnLLsH9m15XkjCTfTvKdYU1/Ouy/KMm+YfZ7h2/+k+Ttw/bB4fGFac6/miSnJXk8yUPD9kyvK8mhJE8meSLJ0rBvZl9/k7Spg97g1wv8NXDdm/btAvZW1SXA3mEbRmu8ZLjtBL5wimZcq2PAp6rqUuBK4OPD/5NZXtd/ANdU1XuBy4DrklwJ3A7cUVUXA68AO4bjdwCvDPvvGI7bzG4BDqzY7rCuX6uqy1b8eOIsv/4mp6o27Q24Cvjmiu1bgVunPdca17AAPLVi+1lg63B/K/DscP8vgY8e77jNfAMeAH6jy7qAM4HHgF9h9OaUuWH/G69FRj/ZddVwf244LtOe/QTr2cYocNcADwGZ9XUBh4Dz37SvxetvvbdNfYZOz18vsKWqjgz3XwK2DPdnbq3DP8kvB/Yx4+saLks8ARwFHga+D7xaVceGQ1bO/caahsdfA847tROP7fPAp4GfDtvnMfvrKuDvkuwf3okOM/76mxR/H/oUVVUlmckfM0ryTuCrwCeq6sdJ3nhsFtdVVf8FXJbkbOBvgfdMeaR1S/Ih4GhV7U/ygWnPM0Hvr6rDSX4OeDjJ91Y+OIuvv0nZ7GfoY/16gRnzcpKtAMPHo8P+mVlrkrcxivndVXX/sHvm1wVQVa8CjzK6FHF2ktdPelbO/caahsffBfzLKR51HFcDH05yCLiH0WWXO5nxdVXV4eHjUUZ/+V5Bk9ffem32oHf89QIPAtuH+9sZXYN+ff/vDN+VvxJ4bcU/ITeNjE7F7wIOVNXnVjw0s+tKMj+cmZPkHYy+J3CAUdhvGg5785peX+tNwCM1XKDdTKrq1qraVlULjP7sPFJVH2OG15XkrCQ/+/p94DeBp5jh199ETfsi/mo34IPAPzG6pvnH055njbN/GTgC/ITRtbsdjK5J7gWeA/4eOHc4Nox+ouf7wJPA4rTnP8Ga3s/oGuZ3gSeG2wdneV3ALwOPD2t6CviTYf+7gW8DB4G/Ad4+7D9j2D44PP7uaa9hjDV+AHho1tc1zP6d4fb0602Y5dffJG++U1SSmtjsl1wkSWMy6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1IT/w2ZJDu/GsgFSgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Sanity check...\n", + "plt.hist(user_weights.values())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[('dandelion@google.com', 569.980163717216),\n", + " ('jart@google.com', 524.5859947976339),\n", + " ('gardener@tensorflow.org', 472.63110233817844),\n", + " ('smilkov@google.com', 434.80813797887964),\n", + " ('wchargin@gmail.com', 393.3191133762563),\n", + " ('zeng.chi@gmail.com', 359.13737400821606),\n", + " ('nicholsonc@google.com', 335.99155668200115),\n", + " ('nsthorat@google.com', 233.81765373473374),\n", + " ('nobody@tensorflow.org', 202.53648716913855),\n", + " ('dsmilkov@gmail.com', 131.64270699045096)]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Accuracy check, given that the operator is familiar with the repo...\n", + "[(k, user_weights[k]) for k in sorted(user_weights, key=user_weights.get, reverse=True)][:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Build a JSON object to send to the frontend.\n", + "results = {\n", + " \"file_to_commits\": data['file_to_commits'],\n", + " \"commits\": {\n", + " c.hexsha: {\n", + " \"author\": c.author.email,\n", + " \"stats\": c.stats.files,\n", + " }\n", + " for c in data['commits'].values()\n", + " },\n", + " \"authors\": sorted(frozenset(c.author.email for c in data['commits'].values())),\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "with open('/tmp/data.json', 'w') as outfile:\n", + " json.dump(results, outfile)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}