Add preliminary mailmap and tool to generate it

This commit is contained in:
Stefan van der Walt
2015-11-02 16:19:20 -08:00
parent d187840f19
commit b0301a2119
2 changed files with 88 additions and 0 deletions
+18
View File
@@ -0,0 +1,18 @@
K.-Michael Aye <michaelaye@users.noreply.github.com> <kmichael.aye@gmail.com>
Nelson Brown <nelson.brown@gmail.com> <nelson.a.brown@nasa.gov>
Luis Pedro Coelho <luis@luispedro.org> <lpc@cmu.edu>
Marianne Corvellec <marianne.corvellec@ens-lyon.org> <mcorvellec@april.org>
Riaan van den Dool <riaanvddool@gmail.com> <rvddool@csir.co.za>
Emmanuelle Gouillart <emmanuelle.gouillart@normalesup.org> <emma@aleph.(none)> <gouillar@epsilon.(none)> <emmanuelle.gouillart@nsup.org> <gouillar@aleph.(none)>
Thouis (Ray) Jones <thouis@gmail.com> <thouis@seas.harvard.edu>
Gregory R. Lee <gregory.lee@cchmc.org> <grlee77@gmail.com>
Andreas Mueller <amueller@ais.uni-bonn.de> <andreas@wuerl.net>
Juan Nunez-Iglesias <juan.n@unimelb.edu.au> <jni.soma@gmail.com> <jni@janelia.hhmi.org>
Nicolas Pinto <pinto@alum.mit.edu> <nicolas.pinto@gmail.com>
Johannes Schönberger <jsch@demuc.de> <ahojnnes@users.noreply.github.com> <hannesschoenberger@gmail.com> <jschoenberger@demuc.de>
Tim Sheerman-Chase <tim2009@sheerman-chase.org.uk> <t.sheerman-chase@surrey.ac.uk>
Matthew Trentacoste <trentaco@adobe.com> <web@matttrent.com>
James Turner <jturner@gemini.edu> <jehturner@yahoo.co.uk>
Stefan van der Walt <stefanv@berkeley.edu> <stefan@sun.ac.za> <github@mentat.za.net> <sjvdwalt@gmail.com>
John Wiggins <jwiggins@enthought.com> <john.wiggins@xfel.eu>
Tony S Yu <tyu@tony-yus-macbook.local> <tsyu80@gmail.com>
+70
View File
@@ -0,0 +1,70 @@
#!/usr/bin/env python
# Requires package 'editdistance'
# A mailmap file is used (by GitHub and other tools) to associate multiple
# commit emails with one user. This helps to count number of commits,
# contributors, etc.
import subprocess
import shlex
import numpy as np
from collections import defaultdict
from editdistance import eval as dist
threshold = 5
def call(cmd):
return subprocess.check_output(shlex.split(cmd), universal_newlines=True).split('\n')
def _clean_email(email):
if not '@' in email:
return
name, domain = email.split('@')
name = name.split('+', 1)[0]
return '{}@{}'.format(name, domain).lower()
call("rm -f .mailmap")
authors = call("git log --format='%aN::%aE'")
names, emails = [], []
for (name, email) in (author.split('::') for author in authors if author.strip()):
if email not in emails:
names.append(name)
emails.append(email)
N = len(names)
D = np.zeros((N, N)) + np.infty
for i in range(1, N):
for j in range(i):
D[i, j] = dist(names[i], names[j])
for i in range(N):
dupes, = np.where(D[:, i] < threshold)
for j in dupes:
names[j] = names[i]
mailmap = defaultdict(set)
for (name, email) in zip(names, emails):
email = _clean_email(email)
if email:
mailmap[name].add(email)
for key, value in list(mailmap.items()):
if len(value) < 2 or (len(key.split()) < 2):
mailmap.pop(key)
entries = []
for name, emails in mailmap.items():
entries.append([name])
entries[-1].extend(['<{}>'.format(email) for email in emails])
entries = sorted(entries, key=lambda x: x[0].split()[-1])
for entry in entries:
print(' '.join(entry))