Check in experimental commit graph traversal (#8)
Summary: This is a minimal script that will crawl the file-reachable commits in a repository and dump statistics to a JSON file that can be further analyzed interactively on a frontend. Requires PyPI `gitpython` (and `matplotlib`, but this can be excised). Requires `jupyter` to run. (Mostly paired with @dandelionmane on 2018-02-08.) wchargin-branch: experiment-commit-traversal
This commit is contained in:
parent
84ad9d57bf
commit
03198b0c86
|
@ -0,0 +1,368 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Exploring the commit graph\n",
|
||||||
|
"\n",
|
||||||
|
"(2018-02-08)\n",
|
||||||
|
"\n",
|
||||||
|
"This script crawls a repository's commit graph by inspecting files\n",
|
||||||
|
"currently in the repository and looking at all commits that have changed\n",
|
||||||
|
"that file (tracking over renames). The data is lightly processed and is\n",
|
||||||
|
"serialized to JSON to be displayed by a frontend."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import collections\n",
|
||||||
|
"import datetime\n",
|
||||||
|
"import json\n",
|
||||||
|
"import math\n",
|
||||||
|
"import os\n",
|
||||||
|
"import subprocess\n",
|
||||||
|
"import tempfile\n",
|
||||||
|
"\n",
|
||||||
|
"import git\n",
|
||||||
|
"\n",
|
||||||
|
"import matplotlib.pyplot as plt"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"class RepoContext:\n",
|
||||||
|
"\n",
|
||||||
|
" def __init__(self, repo_root):\n",
|
||||||
|
" self._repo_root = repo_root\n",
|
||||||
|
" \n",
|
||||||
|
" def _tempfile_name(self):\n",
|
||||||
|
" \"\"\"Get a persistent tempfile name keyed against this repo.\"\"\"\n",
|
||||||
|
" # Encode the path to the repo in a format that can be used in paths.\n",
|
||||||
|
" sanitized_repo_name = self._repo_root.replace(os.sep, '%')\n",
|
||||||
|
" filename = 'file_to_hashes_%s.json' % sanitized_repo_name\n",
|
||||||
|
" return os.path.join(tempfile.gettempdir(), filename)\n",
|
||||||
|
"\n",
|
||||||
|
" def _git_cmd(self, args):\n",
|
||||||
|
" \"\"\"Run git(1) with the specified arguments; return its stdout.\"\"\"\n",
|
||||||
|
" return subprocess.check_output(['git', '-C', self._repo_root] + args)\n",
|
||||||
|
"\n",
|
||||||
|
" def hashes_for_file(self, current_filename):\n",
|
||||||
|
" assert current_filename, current_filename\n",
|
||||||
|
" # Obviously really inefficient to spawn a suprocess for each file\n",
|
||||||
|
" # in the repo, but the `git` module doesn't seem to have an easy way\n",
|
||||||
|
" # to `--follow`... if productionized, this could just use libgit2\n",
|
||||||
|
" # bindings for performance.\n",
|
||||||
|
" hashes = (\n",
|
||||||
|
" self._git_cmd(['log', '--pretty=%H', '--follow', current_filename])\n",
|
||||||
|
" ).splitlines()\n",
|
||||||
|
" return [h.decode('ascii') for h in hashes]\n",
|
||||||
|
"\n",
|
||||||
|
" def all_files(self):\n",
|
||||||
|
" \"\"\"Get a list of all paths to files in the git repo.\n",
|
||||||
|
" \n",
|
||||||
|
" Assumes that filenames are UTF-8--encoded, which seems reasonable.\n",
|
||||||
|
" Alternately, could return a list of `bytes` objects.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" file_bytestring_names = self._git_cmd([\n",
|
||||||
|
" 'ls-tree', '-r', '--full-tree', '--name-only', '-z', 'HEAD'\n",
|
||||||
|
" ]).split(b'\\0')[:-1] # strip trailing '\\0'\n",
|
||||||
|
" return [fn.decode('utf-8') for fn in file_bytestring_names]\n",
|
||||||
|
"\n",
|
||||||
|
" def load_commit_data(self):\n",
|
||||||
|
" \"\"\"Load data about the relationships between commits and files.\n",
|
||||||
|
"\n",
|
||||||
|
" A commit is _file-reachable_ if it touches a file that currently\n",
|
||||||
|
" exists in the repository, even if that file might have been renamed\n",
|
||||||
|
" (but not if it has been deleted).\n",
|
||||||
|
"\n",
|
||||||
|
" Returns a dictionary `r` such that\n",
|
||||||
|
" - `r['file_to_commits']` is a dictionary `d` such that if `f` is a\n",
|
||||||
|
" path to a file currently tracked in the repository, then `d[f]`\n",
|
||||||
|
" is the list of commit SHAs for all commits that have touched `f`\n",
|
||||||
|
" (tracking across renames of `f`);\n",
|
||||||
|
" - `r['files']` is a list of all files currently in the repository\n",
|
||||||
|
" (and is the keyset of `r['file_to_commits']`;\n",
|
||||||
|
" - `r['hashes']` is a list of hashes of all file-reachable commits\n",
|
||||||
|
" (and is the union of the values of `r['file_to_commits']`;\n",
|
||||||
|
" - `r['commits']` is a dictionary `d` such that if `h` is the hash\n",
|
||||||
|
" of a file-reachable commit, then `d[h]` is a gitpython `Commit`\n",
|
||||||
|
" descriptor for the commit with hash `h`; and\n",
|
||||||
|
" - `r['repo']` is a gitpython `Repo` object for the repository.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" print('Starting: %s' % self._repo_root)\n",
|
||||||
|
" all_files = self.all_files()\n",
|
||||||
|
" print('Got %s files' % len(all_files))\n",
|
||||||
|
"\n",
|
||||||
|
" repo = git.Repo(self._repo_root)\n",
|
||||||
|
" head = repo.commit().hexsha\n",
|
||||||
|
"\n",
|
||||||
|
" # `files[x] = cs` where `cs` is the list of hashes that touched the\n",
|
||||||
|
" # file now known as `x`\n",
|
||||||
|
" try:\n",
|
||||||
|
" print('Loading file database')\n",
|
||||||
|
" with open(self._tempfile_name()) as infile:\n",
|
||||||
|
" result = json.load(infile)\n",
|
||||||
|
" assert result['base'] == self._repo_root, (\n",
|
||||||
|
" 'Cache for wrong repo: expected %r, got %r' %\n",
|
||||||
|
" (self._repo_root, result['base']))\n",
|
||||||
|
" assert result['head'] == head, (\n",
|
||||||
|
" 'Cache for wrong HEAD commit: expected %r, got %r'\n",
|
||||||
|
" % (head, result['head']))\n",
|
||||||
|
" files = result['files']\n",
|
||||||
|
" print('Loaded file database')\n",
|
||||||
|
" except (OSError, json.decoder.JSONDecodeError):\n",
|
||||||
|
" # Build the cache\n",
|
||||||
|
" print('Compiling file database')\n",
|
||||||
|
" files = {fn: self.hashes_for_file(fn) for fn in all_files}\n",
|
||||||
|
" print('Compiled file database')\n",
|
||||||
|
" cache = {\n",
|
||||||
|
" 'base': self._repo_root,\n",
|
||||||
|
" 'head': head,\n",
|
||||||
|
" 'files': files,\n",
|
||||||
|
" }\n",
|
||||||
|
" print('Created cache for file database')\n",
|
||||||
|
" with open(self._tempfile_name(), 'w') as outfile:\n",
|
||||||
|
" json.dump(cache, outfile)\n",
|
||||||
|
" print('Dumped cache for file database')\n",
|
||||||
|
"\n",
|
||||||
|
" all_hashes = frozenset().union(*files.values())\n",
|
||||||
|
" print('Got %s hashes' % len(all_hashes))\n",
|
||||||
|
"\n",
|
||||||
|
" # `commits[h] == c` s.t. `c.hash == h`\n",
|
||||||
|
" commits = {h: repo.commit(h) for h in all_hashes}\n",
|
||||||
|
" print('Got %s commits' % len(commits))\n",
|
||||||
|
" \n",
|
||||||
|
" return {\n",
|
||||||
|
" 'file_to_commits': files,\n",
|
||||||
|
" 'hashes': all_hashes,\n",
|
||||||
|
" 'files': all_files,\n",
|
||||||
|
" 'commits': commits,\n",
|
||||||
|
" 'repo': repo,\n",
|
||||||
|
" }"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Starting: /home/wchargin/git/tensorboard\n",
|
||||||
|
"Got 636 files\n",
|
||||||
|
"Loading file database\n",
|
||||||
|
"Loaded file database\n",
|
||||||
|
"Got 1223 hashes\n",
|
||||||
|
"Got 1223 commits\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"data = RepoContext('/home/wchargin/git/tensorboard').load_commit_data()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Which commits are important?\n",
|
||||||
|
"def _commit_weight(commit):\n",
|
||||||
|
" return math.log1p(commit.stats.total['lines'])\n",
|
||||||
|
"\n",
|
||||||
|
"commit_weights = {\n",
|
||||||
|
" h: _commit_weight(data['commits'][h]) for h in data['commits']\n",
|
||||||
|
"}\n",
|
||||||
|
"weight_values = sorted(commit_weights.values())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"(array([ 1., 30., 0., 112., 55., 79., 73., 76., 78., 68., 63.,\n",
|
||||||
|
" 77., 55., 58., 107., 68., 70., 50., 38., 15., 9., 6.,\n",
|
||||||
|
" 13., 5., 8., 1., 2., 1., 2., 3.]),\n",
|
||||||
|
" array([ 0. , 0.35119409, 0.70238817, 1.05358226, 1.40477635,\n",
|
||||||
|
" 1.75597043, 2.10716452, 2.45835861, 2.80955269, 3.16074678,\n",
|
||||||
|
" 3.51194086, 3.86313495, 4.21432904, 4.56552312, 4.91671721,\n",
|
||||||
|
" 5.2679113 , 5.61910538, 5.97029947, 6.32149356, 6.67268764,\n",
|
||||||
|
" 7.02388173, 7.37507582, 7.7262699 , 8.07746399, 8.42865807,\n",
|
||||||
|
" 8.77985216, 9.13104625, 9.48224033, 9.83343442, 10.18462851,\n",
|
||||||
|
" 10.53582259]),\n",
|
||||||
|
" <a list of 30 Patch objects>)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADZ5JREFUeJzt3V2oXeWdx/Hvb0yl1ULV5hDSROYEGlqkUJSD2AmUYnphJ6XJRRHLTCdIIDdOa1+gTXvjbYTS1oFBCGqbYUQrqRCp0hlJLWUuJvREZXxJi8FGTSYxp7TatwEr/c/FWcJpPMk5Z6+93Wc/5/uBsNd69nr5r7z8zrOfvZ6VVBWSpHb9zbgLkCSNlkEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJaty6cRcAsH79+pqenh53GZI0UY4dO/brqppaartVEfTT09PMzs6OuwxJmihJXlrOdg7dSFLjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS41bFzNhJNb3v0WVtd3L/jhFXIkkXZo9ekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DgnTGlNcrKb1hJ79JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGLRn0Se5Lci7JswvarkryeJIXutcru/Yk+ZckJ5L8T5LrRlm8JGlpy+nRfx+46by2fcCRqtoKHOnWAT4FbO1+7QXuHk6ZkqRBLRn0VfUz4DfnNe8EDnbLB4FdC9r/reb9N3BFko3DKlaStHKDjtFvqKoz3fJZYEO3vAl4ZcF2p7o2SdKY9P4ytqoKqJXul2Rvktkks3Nzc33LkCRdwKBB/+pbQzLd67mu/TRw9YLtNndtb1NVB6pqpqpmpqamBixDkrSUQYP+EWB3t7wbOLyg/Z+6u29uAF5fMMQjSRqDJZ9Hn+QB4BPA+iSngDuA/cBDSfYALwE3d5s/Bvw9cAL4E3DrCGqWJK3AkkFfVZ+7wFvbF9m2gNv6FiVJGh5nxkpS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxvYI+yZeTPJfk2SQPJHl3ki1JjiY5keQHSS4dVrGSpJUbOOiTbAK+CMxU1UeAS4BbgDuB71TVB4HfAnuGUagkaTB9h27WAe9Jsg64DDgD3Agc6t4/COzqeQ5JUg8DB31VnQa+BbzMfMC/DhwDXquqN7vNTgGb+hYpSRpcn6GbK4GdwBbgA8DlwE0r2H9vktkks3Nzc4OWIUlaQp+hm08Cv6qquar6M/AwsA24ohvKAdgMnF5s56o6UFUzVTUzNTXVowxJ0sWsW3qTC3oZuCHJZcD/AduBWeAJ4LPAg8Bu4HDfIteK6X2PLmu7k/t3jLgSSS3pM0Z/lPkvXZ8EnumOdQD4OvCVJCeA9wP3DqFOSdKA+vToqao7gDvOa34RuL7PcfXOWu4nCfDThDSJegW91JfDVdLo+QgESWqcPfqGrWRIRlK77NFLUuMMeklqnEEvSY0z6CWpcQa9JDXOu24mkHfTSFoJe/SS1DiDXpIaZ9BLUuMMeklqnF/GSkPiA9q0Wtmjl6TGGfSS1DiDXpIa5xi9VmStjUM7OU0tsEcvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapwTpjQSTjSSVg979JLUuF5Bn+SKJIeS/CLJ8SQfS3JVkseTvNC9XjmsYiVJK9e3R38X8OOq+jDwUeA4sA84UlVbgSPduiRpTAYO+iTvAz4O3AtQVW9U1WvATuBgt9lBYFffIiVJg+vTo98CzAHfS/JUknuSXA5sqKoz3TZngQ19i5QkDa5P0K8DrgPurqprgT9y3jBNVRVQi+2cZG+S2SSzc3NzPcqQJF1Mn6A/BZyqqqPd+iHmg//VJBsButdzi+1cVQeqaqaqZqampnqUIUm6mIGDvqrOAq8k+VDXtB14HngE2N217QYO96pQktRL3wlTXwDuT3Ip8CJwK/M/PB5Ksgd4Cbi55zkkST30CvqqehqYWeSt7X2OKw3KGbnS2zkzVpIaZ9BLUuMMeklqnEEvSY3zMcXvAL8g7M/fQ2lw9uglqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjXPClPQOW+7kr5P7d4y4Eq0V9uglqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuN6B32SS5I8leRH3fqWJEeTnEjygySX9i9TkjSoYfTobweOL1i/E/hOVX0Q+C2wZwjnkCQNqFfQJ9kM7ADu6dYD3Agc6jY5COzqcw5JUj99e/TfBb4G/KVbfz/wWlW92a2fAjb1PIckqYeBgz7Jp4FzVXVswP33JplNMjs3NzdoGZKkJfTp0W8DPpPkJPAg80M2dwFXJHnr/6LdDJxebOeqOlBVM1U1MzU11aMMSdLFDBz0VfWNqtpcVdPALcBPquofgCeAz3ab7QYO965SkjSwUdxH/3XgK0lOMD9mf+8IziFJWqZ1S2+ytKr6KfDTbvlF4PphHFday6b3Pbqs7U7u3zHiSjTpnBkrSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuKE8vXISLPdJgODTACW1xR69JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWrcwEGf5OokTyR5PslzSW7v2q9K8niSF7rXK4dXriRppfr06N8EvlpV1wA3ALcluQbYBxypqq3AkW5dkjQmAwd9VZ2pqie75d8Dx4FNwE7gYLfZQWBX3yIlSYMbyhh9kmngWuAosKGqznRvnQU2DOMckqTB9A76JO8Ffgh8qap+t/C9qiqgLrDf3iSzSWbn5ub6liFJuoBeQZ/kXcyH/P1V9XDX/GqSjd37G4Fzi+1bVQeqaqaqZqampvqUIUm6iD533QS4FzheVd9e8NYjwO5ueTdwePDyJEl9reux7zbg88AzSZ7u2r4J7AceSrIHeAm4uV+JkqQ+Bg76qvovIBd4e/ugx5UkDZczYyWpcQa9JDXOoJekxhn0ktS4PnfdSFoFpvc9uqztTu7fMeJKtFrZo5ekxhn0ktQ4g16SGucYvaSBLPe7AfD7gXGzRy9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIa5+2V0hrhoxLWLnv0ktQ4e/SSVg0/dYyGPXpJapxBL0mNM+glqXGO0Uv6Kyt5WJkmgz16SWqcPXpJI+enhPGyRy9JjTPoJalxDt1I0gispslf9uglqXEj6dEnuQm4C7gEuKeq9o/iPJJ0McPuVU/ql8pD79EnuQT4V+BTwDXA55JcM+zzSJKWZxQ9+uuBE1X1IkCSB4GdwPMjONfE/oSVNLhh/7tvPUdGMUa/CXhlwfqprk2SNAZju+smyV5gb7f6hyS/HPBQ64FfD6eqeblzmEcbmqFf5yrldbZjLVwj9LzOnnnzt8vZaBRBfxq4esH65q7tr1TVAeBA35Mlma2qmb7HWe28zrashetcC9cIk3Gdoxi6+TmwNcmWJJcCtwCPjOA8kqRlGHqPvqreTPLPwH8wf3vlfVX13LDPI0lanpGM0VfVY8Bjozj2InoP/0wIr7Mta+E618I1wgRcZ6pq3DVIkkbIRyBIUuMmOuiT3JTkl0lOJNk37npGIcnVSZ5I8nyS55LcPu6aRiXJJUmeSvKjcdcyKkmuSHIoyS+SHE/ysXHXNApJvtz9fX02yQNJ3j3umoYhyX1JziV5dkHbVUkeT/JC93rlOGtczMQG/Rp61MKbwFer6hrgBuC2Rq8T4Hbg+LiLGLG7gB9X1YeBj9Lg9SbZBHwRmKmqjzB/U8Yt461qaL4P3HRe2z7gSFVtBY5066vKxAY9Cx61UFVvAG89aqEpVXWmqp7sln/PfDA0N9M4yWZgB3DPuGsZlSTvAz4O3AtQVW9U1WvjrWpk1gHvSbIOuAz43zHXMxRV9TPgN+c17wQOdssHgV3vaFHLMMlBv+YetZBkGrgWODreSkbiu8DXgL+Mu5AR2gLMAd/rhqjuSXL5uIsatqo6DXwLeBk4A7xeVf853qpGakNVnemWzwIbxlnMYiY56NeUJO8Ffgh8qap+N+56hinJp4FzVXVs3LWM2DrgOuDuqroW+COr8GN+X90Y9U7mf7B9ALg8yT+Ot6p3Rs3fxrjqbmWc5KBf1qMWWpDkXcyH/P1V9fC46xmBbcBnkpxkfgjuxiT/Pt6SRuIUcKqq3vpEdoj54G/NJ4FfVdVcVf0ZeBj4uzHXNEqvJtkI0L2eG3M9bzPJQb8mHrWQJMyP6R6vqm+Pu55RqKpvVNXmqppm/s/xJ1XVXA+wqs4CryT5UNe0nRE9vnvMXgZuSHJZ9/d3Ow1+6bzAI8Dubnk3cHiMtSxqYv/P2DX0qIVtwOeBZ5I83bV9s5t9rMnzBeD+rnPyInDrmOsZuqo6muQQ8CTzd409xQTMHl2OJA8AnwDWJzkF3AHsBx5Ksgd4Cbh5fBUuzpmxktS4SR66kSQtg0EvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1Lj/h+tOhhCt7WwHAAAAABJRU5ErkJggg==\n",
|
||||||
|
"text/plain": [
|
||||||
|
"<matplotlib.figure.Figure at 0x7f2089a264a8>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Sanity check...\n",
|
||||||
|
"plt.hist(commit_weights.values(), bins=30)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Which users are important?\n",
|
||||||
|
"user_weights = collections.defaultdict(lambda: 0.0)\n",
|
||||||
|
"email_resolutions = {\n",
|
||||||
|
" 'danmane@gmail.com': 'dandelion@google.com',\n",
|
||||||
|
"}\n",
|
||||||
|
"for h in data['commits']:\n",
|
||||||
|
" nominal_email = data['commits'][h].author.email\n",
|
||||||
|
" email = email_resolutions.get(nominal_email, nominal_email)\n",
|
||||||
|
" user_weights[email] += commit_weights[h]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"(array([95., 4., 1., 1., 1., 1., 2., 1., 1., 2.]),\n",
|
||||||
|
" array([ 0.69314718, 57.62184883, 114.55055049, 171.47925214,\n",
|
||||||
|
" 228.4079538 , 285.33665545, 342.2653571 , 399.19405876,\n",
|
||||||
|
" 456.12276041, 513.05146206, 569.98016372]),\n",
|
||||||
|
" <a list of 10 Patch objects>)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADMlJREFUeJzt3F+MpfVdx/H3R6aUQk35N25Wljg0EBtiLDQThNCYCmqwNIUL0tA0ujGb7E1Vapu0iyY23kFiSjExjZtS3QtSqBQDoU0rLvTCm62zQMufLbLFpWWzsFMDVL3Qrv16cR5wxF3mzM6ZPXO+vl/JyZznOc/M+f7g7HuffWbOpKqQJM2+n5n2AJKkyTDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKamDuVT3b++efXwsLCqXxKSZp5+/fv/1FVza923CkN+sLCAktLS6fyKSVp5iV5YZzjvOQiSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTZzSd4qux8Kur03leQ/ddv1UnleS1sozdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1MRYQU/yh0meTvJUki8nOSPJRUn2JTmY5N4kp2/0sJKkE1s16EkuAP4AWKyqXwJOA24GbgfuqKqLgVeAHRs5qCTprY17yWUOeEeSOeBM4AhwDXDf8Pge4MbJjydJGteqQa+qw8CfAT9gFPLXgP3Aq1V1bDjsReCC431+kp1JlpIsLS8vT2ZqSdL/Mc4ll3OAG4CLgJ8HzgKuG/cJqmp3VS1W1eL8/PxJDypJemvjXHL5deCfq2q5qn4C3A9cDZw9XIIB2AYc3qAZJUljGCfoPwCuTHJmkgDXAs8AjwI3DcdsBx7YmBElSeMY5xr6Pkbf/HwMeHL4nN3AZ4BPJjkInAfctYFzSpJWMbf6IVBVnwU++6bdzwNXTHwiSdJJ8Z2iktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6Qmxgp6krOT3Jfke0kOJLkqyblJHk7y3PDxnI0eVpJ0YuOeod8JfKOq3gO8FzgA7AL2VtUlwN5hW5I0JasGPcm7gF8F7gKoqv+sqleBG4A9w2F7gBs3akhJ0urGOUO/CFgG/irJ40m+mOQsYEtVHRmOeQnYslFDSpJWN07Q54D3AV+oqsuBf+dNl1eqqoA63icn2ZlkKcnS8vLyeueVJJ3AOEF/EXixqvYN2/cxCvzLSbYCDB+PHu+Tq2p3VS1W1eL8/PwkZpYkHceqQa+ql4AfJvnFYde1wDPAg8D2Yd924IENmVCSNJa5MY/7feDuJKcDzwO/y+gvg68k2QG8AHxkY0aUJI1jrKBX1RPA4nEeunay40iSTpbvFJWkJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJamJsYOe5LQkjyd5aNi+KMm+JAeT3Jvk9I0bU5K0mrWcod8CHFixfTtwR1VdDLwC7JjkYJKktRkr6Em2AdcDXxy2A1wD3Dccsge4cSMGlCSNZ9wz9M8DnwZ+OmyfB7xaVceG7ReBCyY8myRpDVYNepIPAUerav/JPEGSnUmWkiwtLy+fzJeQJI1hnDP0q4EPJzkE3MPoUsudwNlJ5oZjtgGHj/fJVbW7qharanF+fn4CI0uSjmfVoFfVrVW1raoWgJuBR6rqY8CjwE3DYduBBzZsSknSqtbzc+ifAT6Z5CCja+p3TWYkSdLJmFv9kP9RVd8CvjXcfx64YvIjSZJOhu8UlaQmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqYlVg57kwiSPJnkmydNJbhn2n5vk4STPDR/P2fhxJUknMs4Z+jHgU1V1KXAl8PEklwK7gL1VdQmwd9iWJE3JqkGvqiNV9dhw/1+BA8AFwA3AnuGwPcCNGzWkJGl1a7qGnmQBuBzYB2ypqiPDQy8BW07wOTuTLCVZWl5eXseokqS3MnbQk7wT+Crwiar68crHqqqAOt7nVdXuqlqsqsX5+fl1DStJOrGxgp7kbYxifndV3T/sfjnJ1uHxrcDRjRlRkjSOcX7KJcBdwIGq+tyKhx4Etg/3twMPTH48SdK45sY45mrgt4Enkzwx7Psj4DbgK0l2AC8AH9mYESVJ41g16FX1D0BO8PC1kx1HknSyfKeoJDVh0CWpCYMuSU0YdElqwqBLUhMGXZKaMOiS1IRBl6QmDLokNWHQJakJgy5JTRh0SWrCoEtSEwZdkpow6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1ITBl2SmjDoktSEQZekJgy6JDVh0CWpCYMuSU0YdElqwqBLUhNz0x5gs1vY9bWpPfeh266f2nNLmj2eoUtSEwZdkpow6JLUhNfQpSmb1vdp/j9+j6b7f+t1naEnuS7Js0kOJtk1qaEkSWt30kFPchrwF8BvAZcCH01y6aQGkyStzXouuVwBHKyq5wGS3APcADwzicE03R+ZVH++vvpZzyWXC4Afrth+cdgnSZqCDf+maJKdwM5h89+SPHuSX+p84EeTmWrT6Lgm6LmujmuCnuvadGvK7ev+Er8wzkHrCfph4MIV29uGff9LVe0Gdq/jeQBIslRVi+v9OptJxzVBz3V1XBP0XFfHNY1rPZdc/hG4JMlFSU4HbgYenMxYkqS1Oukz9Ko6luT3gG8CpwFfqqqnJzaZJGlN1nUNvaq+Dnx9QrOsZt2XbTahjmuCnuvquCboua6OaxpLqmraM0iSJsDf5SJJTWz6oM/yrxdI8qUkR5M8tWLfuUkeTvLc8PGcYX+S/Pmwzu8med/0Jj+xJBcmeTTJM0meTnLLsH9m15XkjCTfTvKdYU1/Ouy/KMm+YfZ7h2/+k+Ttw/bB4fGFac6/miSnJXk8yUPD9kyvK8mhJE8meSLJ0rBvZl9/k7Spg97g1wv8NXDdm/btAvZW1SXA3mEbRmu8ZLjtBL5wimZcq2PAp6rqUuBK4OPD/5NZXtd/ANdU1XuBy4DrklwJ3A7cUVUXA68AO4bjdwCvDPvvGI7bzG4BDqzY7rCuX6uqy1b8eOIsv/4mp6o27Q24Cvjmiu1bgVunPdca17AAPLVi+1lg63B/K/DscP8vgY8e77jNfAMeAH6jy7qAM4HHgF9h9OaUuWH/G69FRj/ZddVwf244LtOe/QTr2cYocNcADwGZ9XUBh4Dz37SvxetvvbdNfYZOz18vsKWqjgz3XwK2DPdnbq3DP8kvB/Yx4+saLks8ARwFHga+D7xaVceGQ1bO/caahsdfA847tROP7fPAp4GfDtvnMfvrKuDvkuwf3okOM/76mxR/H/oUVVUlmckfM0ryTuCrwCeq6sdJ3nhsFtdVVf8FXJbkbOBvgfdMeaR1S/Ih4GhV7U/ygWnPM0Hvr6rDSX4OeDjJ91Y+OIuvv0nZ7GfoY/16gRnzcpKtAMPHo8P+mVlrkrcxivndVXX/sHvm1wVQVa8CjzK6FHF2ktdPelbO/caahsffBfzLKR51HFcDH05yCLiH0WWXO5nxdVXV4eHjUUZ/+V5Bk9ffem32oHf89QIPAtuH+9sZXYN+ff/vDN+VvxJ4bcU/ITeNjE7F7wIOVNXnVjw0s+tKMj+cmZPkHYy+J3CAUdhvGg5785peX+tNwCM1XKDdTKrq1qraVlULjP7sPFJVH2OG15XkrCQ/+/p94DeBp5jh199ETfsi/mo34IPAPzG6pvnH055njbN/GTgC/ITRtbsdjK5J7gWeA/4eOHc4Nox+ouf7wJPA4rTnP8Ga3s/oGuZ3gSeG2wdneV3ALwOPD2t6CviTYf+7gW8DB4G/Ad4+7D9j2D44PP7uaa9hjDV+AHho1tc1zP6d4fb0602Y5dffJG++U1SSmtjsl1wkSWMy6JLUhEGXpCYMuiQ1YdAlqQmDLklNGHRJasKgS1IT/w2ZJDu/GsgFSgAAAABJRU5ErkJggg==\n",
|
||||||
|
"text/plain": [
|
||||||
|
"<matplotlib.figure.Figure at 0x7f2087770240>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Sanity check...\n",
|
||||||
|
"plt.hist(user_weights.values())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[('dandelion@google.com', 569.980163717216),\n",
|
||||||
|
" ('jart@google.com', 524.5859947976339),\n",
|
||||||
|
" ('gardener@tensorflow.org', 472.63110233817844),\n",
|
||||||
|
" ('smilkov@google.com', 434.80813797887964),\n",
|
||||||
|
" ('wchargin@gmail.com', 393.3191133762563),\n",
|
||||||
|
" ('zeng.chi@gmail.com', 359.13737400821606),\n",
|
||||||
|
" ('nicholsonc@google.com', 335.99155668200115),\n",
|
||||||
|
" ('nsthorat@google.com', 233.81765373473374),\n",
|
||||||
|
" ('nobody@tensorflow.org', 202.53648716913855),\n",
|
||||||
|
" ('dsmilkov@gmail.com', 131.64270699045096)]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Accuracy check, given that the operator is familiar with the repo...\n",
|
||||||
|
"[(k, user_weights[k]) for k in sorted(user_weights, key=user_weights.get, reverse=True)][:10]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Build a JSON object to send to the frontend.\n",
|
||||||
|
"results = {\n",
|
||||||
|
" \"file_to_commits\": data['file_to_commits'],\n",
|
||||||
|
" \"commits\": {\n",
|
||||||
|
" c.hexsha: {\n",
|
||||||
|
" \"author\": c.author.email,\n",
|
||||||
|
" \"stats\": c.stats.files,\n",
|
||||||
|
" }\n",
|
||||||
|
" for c in data['commits'].values()\n",
|
||||||
|
" },\n",
|
||||||
|
" \"authors\": sorted(frozenset(c.author.email for c in data['commits'].values())),\n",
|
||||||
|
"}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"with open('/tmp/data.json', 'w') as outfile:\n",
|
||||||
|
" json.dump(results, outfile)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.5.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
Loading…
Reference in New Issue