diff --git a/experiments/01_commit_graph_dump.ipynb b/experiments/01_commit_graph_dump.ipynb deleted file mode 100644 index 1b1c60a..0000000 --- a/experiments/01_commit_graph_dump.ipynb +++ /dev/null @@ -1,374 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Exploring the commit graph\n", - "\n", - "(2018-02-08)\n", - "\n", - "This script crawls a repository's commit graph by inspecting files\n", - "currently in the repository and looking at all commits that have changed\n", - "that file (tracking over renames). The data is lightly processed and is\n", - "serialized to JSON to be displayed by a frontend." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import collections\n", - "import datetime\n", - "import json\n", - "import math\n", - "import os\n", - "import subprocess\n", - "import tempfile\n", - "\n", - "import git\n", - "\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "class RepoContext:\n", - "\n", - " def __init__(self, repo_root):\n", - " self._repo_root = repo_root\n", - " \n", - " def _tempfile_name(self):\n", - " \"\"\"Get a persistent tempfile name keyed against this repo.\"\"\"\n", - " # Encode the path to the repo in a format that can be used in paths.\n", - " sanitized_repo_name = self._repo_root.replace(os.sep, '%')\n", - " filename = 'file_to_hashes_%s.json' % sanitized_repo_name\n", - " return os.path.join(tempfile.gettempdir(), filename)\n", - "\n", - " def _git_cmd(self, args):\n", - " \"\"\"Run git(1) with the specified arguments; return its stdout.\"\"\"\n", - " return subprocess.check_output(['git', '-C', self._repo_root] + args)\n", - "\n", - " def hashes_for_file(self, current_filename):\n", - " assert current_filename, current_filename\n", - " # Obviously really inefficient to spawn a suprocess for each file\n", - " # in the repo, but the `git` module doesn't seem to have an easy way\n", - " # to `--follow`... if productionized, this could just use libgit2\n", - " # bindings for performance.\n", - " hashes = (\n", - " self._git_cmd(['log', '--pretty=%H', '--follow', current_filename])\n", - " ).splitlines()\n", - " return [h.decode('ascii') for h in hashes]\n", - "\n", - " def all_files(self):\n", - " \"\"\"Get a list of all paths to files in the git repo.\n", - " \n", - " Assumes that filenames are UTF-8--encoded, which seems reasonable.\n", - " Alternately, could return a list of `bytes` objects.\n", - " \"\"\"\n", - " file_bytestring_names = self._git_cmd([\n", - " 'ls-tree', '-r', '--full-tree', '--name-only', '-z', 'HEAD'\n", - " ]).split(b'\\0')[:-1] # strip trailing '\\0'\n", - " return [fn.decode('utf-8') for fn in file_bytestring_names]\n", - "\n", - " def load_commit_data(self):\n", - " \"\"\"Load data about the relationships between commits and files.\n", - "\n", - " A commit is _file-reachable_ if it touches a file that currently\n", - " exists in the repository, even if that file might have been renamed\n", - " (but not if it has been deleted).\n", - "\n", - " Returns a dictionary `r` such that\n", - " - `r['file_to_commits']` is a dictionary `d` such that if `f` is a\n", - " path to a file currently tracked in the repository, then `d[f]`\n", - " is the list of commit SHAs for all commits that have touched `f`\n", - " (tracking across renames of `f`);\n", - " - `r['files']` is a list of all files currently in the repository\n", - " (and is the keyset of `r['file_to_commits']`;\n", - " - `r['hashes']` is a list of hashes of all file-reachable commits\n", - " (and is the union of the values of `r['file_to_commits']`;\n", - " - `r['commits']` is a dictionary `d` such that if `h` is the hash\n", - " of a file-reachable commit, then `d[h]` is a gitpython `Commit`\n", - " descriptor for the commit with hash `h`; and\n", - " - `r['repo']` is a gitpython `Repo` object for the repository.\n", - " \"\"\"\n", - " print('Starting: %s' % self._repo_root)\n", - " all_files = self.all_files()\n", - " print('Got %s files' % len(all_files))\n", - "\n", - " repo = git.Repo(self._repo_root)\n", - " head = repo.commit().hexsha\n", - "\n", - " # `files[x] = cs` where `cs` is the list of hashes that touched the\n", - " # file now known as `x`\n", - " try:\n", - " print('Loading file database')\n", - " with open(self._tempfile_name()) as infile:\n", - " result = json.load(infile)\n", - " assert result['base'] == self._repo_root, (\n", - " 'Cache for wrong repo: expected %r, got %r' %\n", - " (self._repo_root, result['base']))\n", - " assert result['head'] == head, (\n", - " 'Cache for wrong HEAD commit: expected %r, got %r'\n", - " % (head, result['head']))\n", - " files = result['files']\n", - " print('Loaded file database')\n", - " except (OSError, json.decoder.JSONDecodeError):\n", - " # Build the cache\n", - " print('Compiling file database')\n", - " files = {fn: self.hashes_for_file(fn) for fn in all_files}\n", - " print('Compiled file database')\n", - " cache = {\n", - " 'base': self._repo_root,\n", - " 'head': head,\n", - " 'files': files,\n", - " }\n", - " print('Created cache for file database')\n", - " with open(self._tempfile_name(), 'w') as outfile:\n", - " json.dump(cache, outfile)\n", - " print('Dumped cache for file database')\n", - "\n", - " all_hashes = frozenset().union(*files.values())\n", - " print('Got %s hashes' % len(all_hashes))\n", - "\n", - " # `commits[h] == c` s.t. `c.hash == h`\n", - " commits = {h: repo.commit(h) for h in all_hashes}\n", - " print('Got %s commits' % len(commits))\n", - " \n", - " return {\n", - " 'file_to_commits': files,\n", - " 'hashes': all_hashes,\n", - " 'files': all_files,\n", - " 'commits': commits,\n", - " 'repo': repo,\n", - " }" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Starting: /home/dandelion/git/tensorboard\n", - "Got 698 files\n", - "Loading file database\n", - "Loaded file database\n", - "Got 1323 hashes\n", - "Got 1323 commits\n" - ] - } - ], - "source": [ - "data = RepoContext('/home/dandelion/git/tensorboard').load_commit_data()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Which commits are important?\n", - "def _commit_weight(commit):\n", - " return math.log1p(commit.stats.total['lines'])\n", - "\n", - "commit_weights = {\n", - " h: _commit_weight(data['commits'][h]) for h in data['commits']\n", - "}\n", - "weight_values = sorted(commit_weights.values())" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([ 1., 31., 0., 122., 59., 84., 79., 82., 85., 78., 71.,\n", - " 84., 63., 61., 118., 71., 71., 55., 39., 16., 11., 6.,\n", - " 13., 5., 9., 1., 2., 1., 2., 3.]),\n", - " array([ 0. , 0.35119409, 0.70238817, 1.05358226, 1.40477635,\n", - " 1.75597043, 2.10716452, 2.45835861, 2.80955269, 3.16074678,\n", - " 3.51194086, 3.86313495, 4.21432904, 4.56552312, 4.91671721,\n", - " 5.2679113 , 5.61910538, 5.97029947, 6.32149356, 6.67268764,\n", - " 7.02388173, 7.37507582, 7.7262699 , 8.07746399, 8.42865807,\n", - " 8.77985216, 9.13104625, 9.48224033, 9.83343442, 10.18462851,\n", - " 10.53582259]),\n", - " )" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADqpJREFUeJzt3X+snmV9x/H3Z1RUMAZYDwRbsoNJp7IlBnNCUBJDqMlQCOUPSSDOdYykWcIUnYsU9wf/zKRmxh/LFpYG0JoRkFUWiDAn6yBkf9Dt8CMKFEIDrByp9BgHOjXDzu/+OHeTQzntOX3u5+E5z9X3K2me+76e637u7522n3P1eu77aqoKSVK7fmvcBUiSRsugl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDVuzbgLAFi7dm1NT0+PuwxJmiiPPPLIT6pqarl+qyLop6enmZ2dHXcZkjRRkvzXSvo5dSNJjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklq3LJBn+TWJAeSPLGo7a+TPJ3kB0n+Kckpi967IcneJM8k+YNRFS5JWpmVPBn7TeBvgW8tarsfuKGqDib5EnADcH2Sc4Argd8D3gX8a5Lfrar/G27Zq8P01ntX1O+FbZeMuBJJOrJlR/RV9RDw08Pavl9VB7vdh4H13fYm4I6q+t+qeh7YC5w3xHolScdoGHP0fwL8c7e9Dnhx0XtzXZskaUx6BX2SvwQOArcdalqiWx3h2C1JZpPMzs/P9ylDknQUAwd9ks3ApcAnqupQmM8BZy3qth54aanjq2p7Vc1U1czU1LKrbEqSBjRQ0Ce5GLgeuKyqfrnorXuAK5O8NcnZwAbgP/qXKUka1LJ33SS5HbgQWJtkDriRhbts3grcnwTg4ar606p6MsmdwFMsTOlc2+odN5I0KZYN+qq6aonmW47S/4vAF/sUJY3aSm+NBW+P1eTzyVhJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJatyyQZ/k1iQHkjyxqO20JPcnebZ7PbVrT5K/SbI3yQ+SfGCUxUuSlreSEf03gYsPa9sK7KqqDcCubh/go8CG7tcW4KbhlClJGtSyQV9VDwE/Pax5E7Cj294BXL6o/Vu14GHglCRnDqtYSdKxG3SO/oyq2g/QvZ7eta8DXlzUb65rkySNybC/jM0SbbVkx2RLktkks/Pz80MuQ5J0yKBB//KhKZnu9UDXPgectajfeuClpT6gqrZX1UxVzUxNTQ1YhiRpOYMG/T3A5m57M3D3ovY/6u6+OR949dAUjyRpPNYs1yHJ7cCFwNokc8CNwDbgziTXAPuAK7ru9wEfA/YCvwSuHkHNkqRjsGzQV9VVR3hr4xJ9C7i2b1GSpOHxyVhJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxq0ZdwFaHaa33ruifi9su2TElUgaNoN+FTkew/Z4vGbpzebUjSQ1zqCXpMb1Cvokn03yZJInktye5G1Jzk6yO8mzSb6d5MRhFStJOnYDz9EnWQd8Gjinqn6V5E7gSuBjwFer6o4kfw9cA9w0lGp1TFY6/y2pbX2nbtYAb0+yBjgJ2A9cBOzs3t8BXN7zHJKkHgYO+qr6EfBlYB8LAf8q8AjwSlUd7LrNAev6FilJGtzAQZ/kVGATcDbwLuBk4KNLdK0jHL8lyWyS2fn5+UHLkCQto8/UzUeA56tqvqp+DdwFfAg4pZvKAVgPvLTUwVW1vapmqmpmamqqRxmSpKPp88DUPuD8JCcBvwI2ArPAA8DHgTuAzcDdfYvU6/klq6RjMXDQV9XuJDuBR4GDwGPAduBe4I4kf9W13TKMQrU6+CSrNHl6LYFQVTcCNx7W/BxwXp/PlSQNj0/GSlLjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWpcryUQpCNpaeG1YV+L6wDpzeaIXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXGuXqmJsNIVJF0ZUnqjXiP6JKck2Znk6SR7knwwyWlJ7k/ybPd66rCKlSQdu75TN18HvldV7wXeD+wBtgK7qmoDsKvblySNycBTN0neCXwY+GOAqnoNeC3JJuDCrtsO4EHg+j5FSivV0n94Ig1LnxH9u4F54BtJHktyc5KTgTOqaj9A93r6EOqUJA2oT9CvAT4A3FRV5wK/4BimaZJsSTKbZHZ+fr5HGZKko+kT9HPAXFXt7vZ3shD8Lyc5E6B7PbDUwVW1vapmqmpmamqqRxmSpKMZeI6+qn6c5MUk76mqZ4CNwFPdr83Atu717qFUOsGcN5Y0Tn3vo/8UcFuSE4HngKtZ+FfCnUmuAfYBV/Q8hySph15BX1WPAzNLvLWxz+dKkobHJRAkqXEGvSQ1zqCXpMa5qJn0JnOBNr3ZHNFLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNa530Cc5IcljSb7b7Z+dZHeSZ5N8O8mJ/cuUJA1qGCP664A9i/a/BHy1qjYA/w1cM4RzSJIG1Cvok6wHLgFu7vYDXATs7LrsAC7vcw5JUj99R/RfAz4P/Kbb/23glao62O3PAet6nkOS1MPAQZ/kUuBAVT2yuHmJrnWE47ckmU0yOz8/P2gZkqRl9BnRXwBcluQF4A4Wpmy+BpySZE3XZz3w0lIHV9X2qpqpqpmpqakeZUiSjmbN8l2WVlU3ADcAJLkQ+Iuq+kSSfwQ+zkL4bwbuHkKd0nFneuu9K+r3wrZLRlyJJt0o7qO/HvjzJHtZmLO/ZQTnkCSt0MAj+sWq6kHgwW77OeC8YXyuJKk/n4yVpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1LihLIEwKVwkStLxyBG9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWrcwEGf5KwkDyTZk+TJJNd17acluT/Js93rqcMrV5J0rPqM6A8Cn6uq9wHnA9cmOQfYCuyqqg3Arm5fkjQmAwd9Ve2vqke77Z8De4B1wCZgR9dtB3B53yIlSYMbyhx9kmngXGA3cEZV7YeFHwbA6cM4hyRpML2DPsk7gO8An6mqnx3DcVuSzCaZnZ+f71uGJOkIegV9krewEPK3VdVdXfPLSc7s3j8TOLDUsVW1vapmqmpmamqqTxmSpKPoc9dNgFuAPVX1lUVv3QNs7rY3A3cPXp4kqa81PY69APgk8MMkj3dtXwC2AXcmuQbYB1zRr0RJUh8DB31V/TuQI7y9cdDPlSQNl0/GSlLjDHpJalyfOXpJq8D01ntX1O+FbZeMuBKtVo7oJalxBr0kNc6gl6TGGfSS1Di/jJU0ML8IngyO6CWpcQa9JDXOqRvpOOE0y/HLEb0kNc6gl6TGGfSS1Djn6CWtGn6PMBqO6CWpcQa9JDXOoJekxhn0ktQ4v4yV9Dor/UJUk8OglzRy/vAYL6duJKlxBr0kNc6gl6TGOUcvSSOwmp7yHVnQJ7kY+DpwAnBzVW0b1bkkaSnDDttJ/VJ5JFM3SU4A/g74KHAOcFWSc0ZxLknS0Y1qRH8esLeqngNIcgewCXhq2Cea1J+wkgY37L/3refIqL6MXQe8uGh/rmuTJL3JRjWizxJt9boOyRZgS7f7P0meGfBca4GfDHjskvKlYX7a0Az9Olcpr7Mtx8N19rrGnnnzOyvpNKqgnwPOWrS/HnhpcYeq2g5s73uiJLNVNdP3c1Y7r7MtXmc7JuEaRzV185/AhiRnJzkRuBK4Z0TnkiQdxUhG9FV1MMmfAf/Cwu2Vt1bVk6M4lyTp6EZ2H31V3QfcN6rPX6T39M+E8Drb4nW2Y9VfY6pq+V6SpInlWjeS1LiJDvokFyd5JsneJFvHXc8oJDkryQNJ9iR5Msl1465pVJKckOSxJN8ddy2jkuSUJDuTPN39nn5w3DWNQpLPdn9en0hye5K3jbumYUhya5IDSZ5Y1HZakvuTPNu9njrOGpcysUF/HC2zcBD4XFW9DzgfuLbR6wS4Dtgz7iJG7OvA96rqvcD7afB6k6wDPg3MVNXvs3BDxpXjrWpovglcfFjbVmBXVW0AdnX7q8rEBj2LllmoqteAQ8ssNKWq9lfVo932z1kIhuaeMk6yHrgEuHnctYxKkncCHwZuAaiq16rqlfFWNTJrgLcnWQOcxGHP0UyqqnoI+OlhzZuAHd32DuDyN7WoFZjkoD/ulllIMg2cC+webyUj8TXg88Bvxl3ICL0bmAe+0U1R3Zzk5HEXNWxV9SPgy8A+YD/walV9f7xVjdQZVbUfFgZmwOljrucNJjnol11moSVJ3gF8B/hMVf1s3PUMU5JLgQNV9ci4axmxNcAHgJuq6lzgF6zCf+b31c1RbwLOBt4FnJzkD8db1fFtkoN+2WUWWpHkLSyE/G1Vdde46xmBC4DLkrzAwhTcRUn+YbwljcQcMFdVh/5FtpOF4G/NR4Dnq2q+qn4N3AV8aMw1jdLLSc4E6F4PjLmeN5jkoD8ulllIEhbmdPdU1VfGXc8oVNUNVbW+qqZZ+H38t6pqbgRYVT8GXkzynq5pIyNYunsV2Aecn+Sk7s/vRhr80nmRe4DN3fZm4O4x1rKkif2vBI+jZRYuAD4J/DDJ413bF7onjzV5PgXc1g1OngOuHnM9Q1dVu5PsBB5l4a6xx5iAp0dXIsntwIXA2iRzwI3ANuDOJNew8EPuivFVuDSfjJWkxk3y1I0kaQUMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGvf/9NqA3UwXndMAAAAASUVORK5CYII=\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Sanity check...\n", - "plt.hist(commit_weights.values(), bins=30)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# Which users are important?\n", - "user_weights = collections.defaultdict(lambda: 0.0)\n", - "email_resolutions = {\n", - " 'danmane@gmail.com': 'dandelion@google.com',\n", - "}\n", - "for h in data['commits']:\n", - " nominal_email = data['commits'][h].author.email\n", - " email = email_resolutions.get(nominal_email, nominal_email)\n", - " user_weights[email] += commit_weights[h]" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([104., 5., 2., 1., 1., 1., 1., 1., 2., 2.]),\n", - " array([ 0.69314718, 57.62184883, 114.55055049, 171.47925214,\n", - " 228.4079538 , 285.33665545, 342.2653571 , 399.19405876,\n", - " 456.12276041, 513.05146206, 569.98016372]),\n", - " )" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADeZJREFUeJzt3WuMXHd5x/HvrzHh2ja3TeTGUTeoVguqykWrNDRVRRNahYtIXgQJhIqFLPkNbUNBAqeVivoukSoSkCpUi1BcCXFpSpUooNLIBFV9UdM1CSTBpDZpGty48SKS0IvUkvL0xRyjlVl713N2M56H70canTn/+c+c55HHvz3+75lxqgpJUl8/NesCJElby6CXpOYMeklqzqCXpOYMeklqzqCXpOYMeklqzqCXpOYMeklqbtusCwC45JJLanFxcdZlSNJcOXTo0HeramG9eedE0C8uLrK8vDzrMiRpriT5143Mc+lGkpoz6CWpOYNekpoz6CWpOYNekpoz6CWpOYNekpoz6CWpOYNekpo7Jz4ZO8bi3i/M7NiP3/rmmR1bkjbKM3pJas6gl6TmDHpJam7doE/yiSQnkjy8auyiJPclOTJsLxzGk+SjSY4m+UaS125l8ZKk9W3kjP6TwPWnjO0FDlTVTuDAsA/wRmDncNsDfGxzypQkTWvdoK+qvwe+d8rwDcD+4f5+4MZV439ZE/8IXJBk+2YVK0k6e9Ou0V9WVccBhu2lw/jlwHdWzTs2jEmSZmSzfxmbNcZqzYnJniTLSZZXVlY2uQxJ0knTBv1TJ5dkhu2JYfwYcMWqeTuAJ9d6garaV1VLVbW0sLDuf3koSZrStEF/D7BruL8LuHvV+LuGq2+uBp49ucQjSZqNdb8CIcmngdcDlyQ5BnwIuBX4XJLdwBPA24bpXwTeBBwF/ht49xbULEk6C+sGfVW94zQPXbfG3ALeM7YoSdLm8ZOxktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktScQS9JzRn0ktTcqKBP8gdJHknycJJPJ3lRkiuTHExyJMlnk5y/WcVKks7e1EGf5HLg94Glqvpl4Dzg7cBtwO1VtRN4Gti9GYVKkqYzdulmG/DiJNuAlwDHgWuBu4bH9wM3jjyGJGmEqYO+qv4N+FPgCSYB/yxwCHimqp4bph0DLh9bpCRpemOWbi4EbgCuBH4OeCnwxjWm1mmevyfJcpLllZWVacuQJK1jzNLNG4B/qaqVqvoB8Hng14ALhqUcgB3Ak2s9uar2VdVSVS0tLCyMKEOSdCZjgv4J4OokL0kS4Drgm8D9wE3DnF3A3eNKlCSNMWaN/iCTX7p+DXhoeK19wAeB9yU5ClwM3LkJdUqSprRt/SmnV1UfAj50yvBjwFVjXleStHn8ZKwkNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNWfQS1JzBr0kNTcq6JNckOSuJN9KcjjJ65JclOS+JEeG7YWbVawk6eyNPaP/CPC3VfVLwKuAw8Be4EBV7QQODPuSpBmZOuiT/AzwG8CdAFX1v1X1DHADsH+Yth+4cWyRkqTpjTmjfzmwAvxFkgeSfDzJS4HLquo4wLC9dK0nJ9mTZDnJ8srKyogyJElnMibotwGvBT5WVa8B/ouzWKapqn1VtVRVSwsLCyPKkCSdyZigPwYcq6qDw/5dTIL/qSTbAYbtiXElSpLGmDroq+rfge8k+cVh6Drgm8A9wK5hbBdw96gKJUmjbBv5/N8DPpXkfOAx4N1Mfnh8Lslu4AngbSOPIUkaYVTQV9WDwNIaD1035nUlSZvHT8ZKUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Z9BLUnMGvSQ1Nzrok5yX5IEk9w77VyY5mORIks8mOX98mZKkaW3GGf3NwOFV+7cBt1fVTuBpYPcmHEOSNKVRQZ9kB/Bm4OPDfoBrgbuGKfuBG8ccQ5I0ztgz+juADwA/HPYvBp6pqueG/WPA5SOPIUkaYeqgT/IW4ERVHVo9vMbUOs3z9yRZTrK8srIybRmSpHWMOaO/BnhrkseBzzBZsrkDuCDJtmHODuDJtZ5cVfuqaqmqlhYWFkaUIUk6k6mDvqpuqaodVbUIvB34clW9E7gfuGmYtgu4e3SVkqSpbcV19B8E3pfkKJM1+zu34BiSpA3atv6U9VXVV4CvDPcfA67ajNeVJI3nJ2MlqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqTmDXpKaM+glqbmpgz7JFUnuT3I4ySNJbh7GL0pyX5Ijw/bCzStXknS2xpzRPwe8v6peAVwNvCfJK4G9wIGq2gkcGPYlSTMyddBX1fGq+tpw/z+Aw8DlwA3A/mHafuDGsUVKkqa3KWv0SRaB1wAHgcuq6jhMfhgAl27GMSRJ0xkd9EleBvw18N6q+v5ZPG9PkuUkyysrK2PLkCSdxqigT/ICJiH/qar6/DD8VJLtw+PbgRNrPbeq9lXVUlUtLSwsjClDknQGY666CXAncLiqPrzqoXuAXcP9XcDd05cnSRpr24jnXgP8DvBQkgeHsT8EbgU+l2Q38ATwtnElSpLGmDroq+ofgJzm4eumfV1J0ubyk7GS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1JxBL0nNGfSS1Ny2WRcwzxb3fmEmx3381jfP5LiS5pNn9JLUnEEvSc0Z9JLUnEEvSc0Z9JLU3JZcdZPkeuAjwHnAx6vq1q04zk+qWV3tA17xo601y/f2rDwff6c2/Yw+yXnAnwFvBF4JvCPJKzf7OJKkjdmKM/qrgKNV9RhAks8ANwDf3IJj6Xn2k3jGJc27rVijvxz4zqr9Y8OYJGkGtuKMPmuM1Y9NSvYAe4bd/0zy6JTHuwT47pTPPVd17Al69tWxJ+jZ1znZU24b9fSf38ikrQj6Y8AVq/Z3AE+eOqmq9gH7xh4syXJVLY19nXNJx56gZ18de4KefXXsaaO2Yunmn4CdSa5Mcj7wduCeLTiOJGkDNv2MvqqeS/K7wJeYXF75iap6ZLOPI0namC25jr6qvgh8cSteew2jl3/OQR17gp59dewJevbVsacNSdWP/Z5UktSIX4EgSc3NbdAnuT7Jo0mOJtk763rORpJPJDmR5OFVYxcluS/JkWF74TCeJB8d+vxGktfOrvLTS3JFkvuTHE7ySJKbh/F57+tFSb6a5OtDX38yjF+Z5ODQ12eHCw9I8sJh/+jw+OIs6z+TJOcleSDJvcN+h54eT/JQkgeTLA9jc/0e3AxzGfQNvmbhk8D1p4ztBQ5U1U7gwLAPkx53Drc9wMeepxrP1nPA+6vqFcDVwHuGP5N57+t/gGur6lXAq4Hrk1wN3AbcPvT1NLB7mL8beLqqfgG4fZh3rroZOLxqv0NPAL9ZVa9edSnlvL8Hx6uqubsBrwO+tGr/FuCWWdd1lj0sAg+v2n8U2D7c3w48Otz/c+Ada807l2/A3cBvdeoLeAnwNeBXmXzwZtsw/qP3I5OrzV433N82zMusa1+jlx1MQu9a4F4mH3Sc656G+h4HLjllrM17cNrbXJ7R0/NrFi6rquMAw/bSYXzueh3+af8a4CAN+hqWOB4ETgD3Ad8Gnqmq54Ypq2v/UV/D488CFz+/FW/IHcAHgB8O+xcz/z3B5FP4f5fk0PDpe2jwHhxrXv9z8A19zUITc9VrkpcBfw28t6q+n6xV/mTqGmPnZF9V9X/Aq5NcAPwN8Iq1pg3bc76vJG8BTlTVoSSvPzm8xtS56WmVa6rqySSXAvcl+dYZ5s5TX6PM6xn9hr5mYc48lWQ7wLA9MYzPTa9JXsAk5D9VVZ8fhue+r5Oq6hngK0x+B3FBkpMnSqtr/1Ffw+M/C3zv+a10XdcAb03yOPAZJss3dzDfPQFQVU8O2xNMfihfRaP34LTmNeg7fs3CPcCu4f4uJmvcJ8ffNVwhcDXw7Ml/hp5LMjl1vxM4XFUfXvXQvPe1MJzJk+TFwBuY/ALzfuCmYdqpfZ3s9ybgyzUsAJ8rquqWqtpRVYtM/u58uareyRz3BJDkpUl++uR94LeBh5nz9+CmmPUvCaa9AW8C/pnJeukfzbqes6z908Bx4AdMzip2M1nzPAAcGbYXDXPD5AqjbwMPAUuzrv80Pf06k3/2fgN4cLi9qUFfvwI8MPT1MPDHw/jLga8CR4G/Al44jL9o2D86PP7yWfewTn+vB+7t0NNQ/9eH2yMnc2He34ObcfOTsZLU3Lwu3UiSNsigl6TmDHpJas6gl6TmDHpJas6gl6TmDHpJas6gl6Tm/h8gyLdRgbyIwAAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Sanity check...\n", - "plt.hist(user_weights.values())" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[('dandelion@google.com', 569.9801637172158),\n", - " ('jart@google.com', 545.0875820097374),\n", - " ('zeng.chi@gmail.com', 474.4941786809108),\n", - " ('gardener@tensorflow.org', 472.63110233817844),\n", - " ('smilkov@google.com', 434.80813797887964),\n", - " ('wchargin@gmail.com', 393.3191133762563),\n", - " ('nicholsonc@google.com', 335.9915566820012),\n", - " ('nsthorat@google.com', 233.81765373473377),\n", - " ('nobody@tensorflow.org', 202.53648716913858),\n", - " ('dsmilkov@gmail.com', 131.64270699045093)]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Accuracy check, given that the operator is familiar with the repo...\n", - "[(k, user_weights[k]) for k in sorted(user_weights, key=user_weights.get, reverse=True)][:10]" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "# Build a JSON object to send to the frontend.\n", - "results = {\n", - " \"fileToCommits\": data['file_to_commits'],\n", - " \"commits\": {\n", - " c.hexsha: {\n", - " \"author\": c.author.email,\n", - " \"stats\": c.stats.files,\n", - " }\n", - " for c in data['commits'].values()\n", - " },\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "with open('/tmp/data.json', 'w') as outfile:\n", - " json.dump(results, outfile)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.4" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}